diff options
Diffstat (limited to 'lib/chmlib')
-rw-r--r-- | lib/chmlib/AUTHORS | 18 | ||||
-rw-r--r-- | lib/chmlib/COPYING | 510 | ||||
-rw-r--r-- | lib/chmlib/INSTALL | 236 | ||||
-rw-r--r-- | lib/chmlib/Makefile.am | 3 | ||||
-rw-r--r-- | lib/chmlib/NEWS | 68 | ||||
-rw-r--r-- | lib/chmlib/README | 141 | ||||
-rw-r--r-- | lib/chmlib/chm_lib.c | 1917 | ||||
-rw-r--r-- | lib/chmlib/chm_lib.h | 144 | ||||
-rw-r--r-- | lib/chmlib/lzx.c | 812 | ||||
-rw-r--r-- | lib/chmlib/lzx.h | 62 |
10 files changed, 3911 insertions, 0 deletions
diff --git a/lib/chmlib/AUTHORS b/lib/chmlib/AUTHORS new file mode 100644 index 0000000..431ecc3 --- /dev/null +++ b/lib/chmlib/AUTHORS @@ -0,0 +1,18 @@ +Jed Wing <[email protected]> +includes modified LZX code from cabextract-0.5 by Stuart Caie. + +Thanks to: + iDEFENSE for reporting the stack overflow vulnerability. + Palasik Sandor for reporting and fixing the LZX buffer overrun vulnerability. + David Huseby for enhancements to the chm_enumerate functionality. + Vitaly Bursov for compilation fixes for x86-64. + Vadim Zeitlin for a patch to clean up and fix some deficiencies in the + configure script. + Stan Tobias for bugfixes and index-page improvement to chm_http. + Andrew Hodgetts for major portability improvement. + Rich Erwin for his work towards Windows CE support. + Pabs for bug fixes and suggestions. + Antony Dovgal for setting up autoconf/automake based build process. + Ragnar Hojland Espinosa for patches to make chm_http more useful. + Razvan Cojocaru for forwarding along information regarding building on OS X. + Anyone else I've forgotten. diff --git a/lib/chmlib/COPYING b/lib/chmlib/COPYING new file mode 100644 index 0000000..cc6f365 --- /dev/null +++ b/lib/chmlib/COPYING @@ -0,0 +1,510 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. +^L + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. +^L + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. +^L + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. +^L + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. +^L + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. +^L + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. +^L + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS +^L + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/lib/chmlib/INSTALL b/lib/chmlib/INSTALL new file mode 100644 index 0000000..23e5f25 --- /dev/null +++ b/lib/chmlib/INSTALL @@ -0,0 +1,236 @@ +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005 Free +Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + +Basic Installation +================== + +These are generic installation instructions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. (Caching is +disabled by default to prevent problems with accidental use of stale +cache files.) + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You only need +`configure.ac' if you want to change it or regenerate `configure' using +a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + +You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not support the `VPATH' +variable, you have to compile the package for one architecture at a +time in the source code directory. After you have installed the +package for one architecture, use `make distclean' before reconfiguring +for another architecture. + +Installation Names +================== + +By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + +Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + +Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). Here is a another example: + + /bin/bash ./configure CONFIG_SHELL=/bin/bash + +Here the `CONFIG_SHELL=/bin/bash' operand causes subsequent +configuration-related scripts to be executed by `/bin/bash'. + +`configure' Invocation +====================== + +`configure' recognizes the following options to control how it operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/lib/chmlib/Makefile.am b/lib/chmlib/Makefile.am new file mode 100644 index 0000000..bcbe427 --- /dev/null +++ b/lib/chmlib/Makefile.am @@ -0,0 +1,3 @@ +noinst_LTLIBRARIES = libchm.la +noinst_HEADERS = chm_lib.h lzx.h +libchm_la_SOURCES = chm_lib.c lzx.c diff --git a/lib/chmlib/NEWS b/lib/chmlib/NEWS new file mode 100644 index 0000000..332fd9a --- /dev/null +++ b/lib/chmlib/NEWS @@ -0,0 +1,68 @@ +Changes from 0.37 to 0.38 + - Fix for reading some chm files. Running over a large directory of chm + files, about 1% of them turned out to be unreadable. This resulted from + an incomplete understanding of one of the header fields (index_root). + Apparently, this can take negative values other than -1. + + - Security fix for extract_chmLib. Pathnames containing a ".." element + will not be extracted. There doesn't seem to be a legitimate reason to + use ".." as a path element in a chm file. + +Changes from 0.36 to 0.37 + - Major security fix for stack overflow vulnerability: + http://www.sven-tantau.de/public_files/chmlib/chmlib_20051126.txt + - Corrected the broken Makefile.in. + +Changes from 0.35 to 0.36 + - Major security fix (iDEFENSE Security Advisory IDEF1099 - Stack Overflow + Vulnerability) + - Major security fix from Palasik Sandor (LZX decompression buffer overrun) + - Bugfix/enhancement from David Huseby to make the "what" flags to + chm_enumerate work correctly, and to pass the flags along to the callback + function (via the chmUnitInfo structure) so that the callback doesn't + need to re-parse the filename. + - Compilation fixes for x86-64 from Vitaly V. Bursov. + - Miscellaneous fixes to the configure script, including some significant + cleanup by Vadim Zeitlin. The changes from Vadim should also allow the + configure script to correctly configure the build on OS X, where it was + previously failing to note that pread64 doesn't work. + - Minor update to the Makefile.in to do a mkdir before the install, in case + the specified INSTALLPREFIX directory is non-existent + +Changes from 0.32 to 0.35 + - UTF-8 filenames, while still not handled correctly, are handled a little + more gracefully. That is to say, the library doesn't fail to open files + with filenames using characters outside the ASCII subset. I'm very + interested in any information as to the "right" way to handle filenames + of this sort. + - Files not containing a compressed section are handled properly, such as + .chw files. These files seem to contain information about compression, + but the information is invalid or empty. The library deals gracefully + with this now. + - Files compressed with different options were not being decompressed + properly. In particular, if the "reset interval" for the compressed + section was other than 2 block sizes, it could fail to read some of the + files. + - The caching system was improved slightly, in conjunction with this + previous bugfix. + +Changes from 0.3 to 0.32: + - [Rich Erwin] Minor portability fixes for Windows CE. + - [Pabs] Minor bugfix regarding detecting directory entries versus empty files. + - [Antony Dovgal] autoconf-based build process + - [Ragnar Hojland Espinosa] Feature additions for chm_http: + * Use SO_REUSEADDR + * Allow --bind= and --port= command line arguments + - Simple makefile has been fixed (finally) to use gcc instead of gcc-3.2. (Sorry, everybody!) + +Changes from 0.2 to 0.3: + + - initial attempt at portability to Win32. + - bugfixes from Stan Tobias: + * memory corruption error with caching system + * case insensitivity, to match with the Windows semantics + - modification to chm_http by Stan Tobias: + * when the user requests the page '/', they get a page with links to + all of the files in the archive + - Andrew Hodgetts has ported the library to Solaris and Irix. See README for details. + - Stuart Caie has granted permission to relicense under the LGPL. diff --git a/lib/chmlib/README b/lib/chmlib/README new file mode 100644 index 0000000..d39924b --- /dev/null +++ b/lib/chmlib/README @@ -0,0 +1,141 @@ +CHMLIB 0.38 +=========== + +------- +SUMMARY +------- +chmlib is a small library designed for accessing MS ITSS files. The ITSS file +format is used for Microsoft Html Help files (.chm), which have been the +predominant medium for software documentation from Microsoft during the past +several years, having superceded the previously used .hlp file format. + +Note that this is NOT the same as the OLE structured storage file format used +by MS Excel, Word, and so on. Instead, it is a different file format which +fulfills a similar purpose. Both file formats may be accessed via instances +of the IStorage COM interface, which is essentially an "acts like a +filesystem" interface. + +------- +FILE FORMAT SUPPORT +------- + +Lookup of files in the archive is supported, and should be relatively quick. +Reading of files in the archive is also supported. +Writing is not supported, but may be added in the future. + +In terms of support for the ITSS file format, there are a few places in which +the support provided by this library is not fully general: + +1. ITSS files whose names contain UTF-8 characters which are not part of the + ASCII subset will not currently be dealt with gracefully. Currently, the + filenames are not converted from UTF-8, but are instead returned as-is. I'm + very interested in hearing any suggestions as to the "right" way to handle + this. + +2. Only version 3 ITSS files are supported at present, though some work has + gone towards divining the differences between different versions of the + file format. It is possible that version 2 ITSS files might work properly + with this library, but unconfirmed. + +3. Archives larger than 4 GB should be supported just fine, but if they + contain files larger than 4GB, this library may break. Fortunately, this + seems somewhat unlikely. + +If you run into .chm files (or files you suspect are ITSS files) that this +library doesn't work with, please contact me so I can fix the library. + +------- +PORTABILITY +------- + +This software was originally developed on a Intel x86 Debian Linux machine, +using gcc 3.0. It has since been compiled on various flavors of RedHat as +well, and using versions of gcc from 2.95 through 3.2. Basic Win32 support +should be in this release. (While a port to Win32 may _seem_ like a funny +idea, given that Windows ships with libraries for CHM access, it turns out that +Win CE does not; I've gotten a request for a port to Windows CE.) + +Chmlib apparently works on OS X, with some tweaks. In particular, disabling +pread and io64 apparently works. + +Finally, Andrew Hodgetts has ported to Solaris and IRIX: + +On Monday, 7 Oct 2002, Andrew Hodgetts wrote: +> Solaris(Sun): +> +> I used both SunProC and GCC on the solaris machines to compile. They +> both worked ok. +> However, both required -lsocket on the link line of the Makefile or you +> recieve linking errors. +> +> I have this working on CPUs ranging from Sun4m (Sparc5) through to +> UltraSparcIII (SunFireV880). +> +> Irix (SGI): +> +> I only testing with the MIPS compiler (not GCC). All worked ok - lots of +> warning messages, but it always does that. + +He further noted that: + +> ... for NON GCC compilers, a little tweaking may be required, but nothing too +> complex. ie SunProC doesn't understand -fPIC for library building. Irix +> required -n32 (new 32bit libraries) etc. These are things that someone who +> uses the OS and compiler should be used to dealing with. + +------- +CREDITS +------- + +* Stuart Caie: the LZX decompression code, and for granting permission to + re-license under the LGPL. + +* Sven Tantau: identification of a stack-overflow security flaw and a quick fix + for the problem; identification of a possible security danger in the example + program "extract_chmLib" + +* iDEFENSE Labs: identification of a nasty stack-overflow security flaw + +* Palasik Sandor: identification of a potential security flaw in lzx.c as well + as a quick fix for the problem + +* David Huseby: An excellent patch to the chm_enumerate functionality, relating + to the "what" flags, which didn't work entirely correctly before + +* Vadim Zeitlin: Configure script cleanup, including an important update to + allow detection of platforms where pread64 doesn't work. (OS X) + +* Vitaly V. Bursov: Compilation on x86-64. + +* mc: A suggestion to add a "mkdir" to the install step. + +* Stan Tobias: bugfixes and the added 'index page' feature of chm_http. + +* Andrew Hodgetts: porting to Solaris and IRIX, as well as fixing some + little-endian biases in the code. + +* Rich Erwin: Windows CE support. + +* Pabs: bug fixes and suggestions. + +* Antony Dovgal: setting up autoconf/automake based build process. + +* Ragnar Hojland Espinosa: patches to make chm_http more useful. + +* Razvan Cojocaru: forwarding along information regarding building on OS X. + +* Julien Lemoine: creating and maintaining the Debian package of chmlib. + +* Prarit Bhargava: Compilation on ia64 + +* Jean-Marc Vanel: elimination of compilation warnings in extract_chmLib + +* Sisyphus & Matej Spiller-Muys: Compilation under MinGW32 + +* Kyle Davenport: helper script for using chm_http with mozilla + +* Matthew Daniel & Mark Rosenstand: help to sort out issues with the build + system. + +* Anyone else I've forgotten. (?) + diff --git a/lib/chmlib/chm_lib.c b/lib/chmlib/chm_lib.c new file mode 100644 index 0000000..e7da0b0 --- /dev/null +++ b/lib/chmlib/chm_lib.c @@ -0,0 +1,1917 @@ +/* $Id: chm_lib.c 148 2007-03-07 09:22:36Z gyunaev $ */ +/*************************************************************************** + * chm_lib.c - CHM archive manipulation routines * + * ------------------- * + * * + * author: Jed Wing <[email protected]> * + * version: 0.3 * + * notes: These routines are meant for the manipulation of microsoft * + * .chm (compiled html help) files, but may likely be used * + * for the manipulation of any ITSS archive, if ever ITSS * + * archives are used for any other purpose. * + * * + * Note also that the section names are statically handled. * + * To be entirely correct, the section names should be read * + * from the section names meta-file, and then the various * + * content sections and the "transforms" to apply to the data * + * they contain should be inferred from the section name and * + * the meta-files referenced using that name; however, all of * + * the files I've been able to get my hands on appear to have * + * only two sections: Uncompressed and MSCompressed. * + * Additionally, the ITSS.DLL file included with Windows does * + * not appear to handle any different transforms than the * + * simple LZX-transform. Furthermore, the list of transforms * + * to apply is broken, in that only half the required space * + * is allocated for the list. (It appears as though the * + * space is allocated for ASCII strings, but the strings are * + * written as unicode. As a result, only the first half of * + * the string appears.) So this is probably not too big of * + * a deal, at least until CHM v4 (MS .lit files), which also * + * incorporate encryption, of some description. * + * * + * switches: CHM_MT: compile library with thread-safety * + * * + * switches (Linux only): * + * CHM_USE_PREAD: compile library to use pread instead of * + * lseek/read * + * CHM_USE_IO64: compile library to support full 64-bit I/O * + * as is needed to properly deal with the * + * 64-bit file offsets. * + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU Lesser General Public License as * + * published by the Free Software Foundation; either version 2.1 of the * + * License, or (at your option) any later version. * + * * + ***************************************************************************/ + +#include "chm_lib.h" + +#ifdef CHM_MT +#define _REENTRANT +#endif + +#include "lzx.h" + +#include <stdlib.h> +#include <string.h> +#ifdef CHM_DEBUG +#include <stdio.h> +#endif + +#if __sun || __sgi +#include <strings.h> +#endif + +#ifdef WIN32 +#include <windows.h> +#include <malloc.h> +#ifdef _WIN32_WCE +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#else +#define strcasecmp stricmp +#define strncasecmp strnicmp +#endif +#else +/* basic Linux system includes */ +#define _XOPEN_SOURCE 500 +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +/* #include <dmalloc.h> */ +#endif + +/* includes/defines for threading, if using them */ +#ifdef CHM_MT +#ifdef WIN32 +#define CHM_ACQUIRE_LOCK(a) do { \ + EnterCriticalSection(&(a)); \ + } while(0) +#define CHM_RELEASE_LOCK(a) do { \ + EnterCriticalSection(&(a)); \ + } while(0) + +#else +#include <pthread.h> + +#define CHM_ACQUIRE_LOCK(a) do { \ + pthread_mutex_lock(&(a)); \ + } while(0) +#define CHM_RELEASE_LOCK(a) do { \ + pthread_mutex_unlock(&(a)); \ + } while(0) + +#endif +#else +#define CHM_ACQUIRE_LOCK(a) /* do nothing */ +#define CHM_RELEASE_LOCK(a) /* do nothing */ +#endif + +#ifdef WIN32 +#define CHM_NULL_FD (INVALID_HANDLE_VALUE) +#define CHM_USE_WIN32IO 1 +#define CHM_CLOSE_FILE(fd) CloseHandle((fd)) +#else +#define CHM_NULL_FD (-1) +#define CHM_CLOSE_FILE(fd) close((fd)) +#endif + +/* + * defines related to tuning + */ +#ifndef CHM_MAX_BLOCKS_CACHED +#define CHM_MAX_BLOCKS_CACHED 5 +#endif + +/* + * architecture specific defines + * + * Note: as soon as C99 is more widespread, the below defines should + * probably just use the C99 sized-int types. + * + * The following settings will probably work for many platforms. The sizes + * don't have to be exactly correct, but the types must accommodate at least as + * many bits as they specify. + */ + +/* i386, 32-bit, Windows */ +#ifdef WIN32 +typedef unsigned char UChar; +typedef __int16 Int16; +typedef unsigned __int16 UInt16; +typedef __int32 Int32; +typedef unsigned __int32 UInt32; +typedef __int64 Int64; +typedef unsigned __int64 UInt64; + +/* I386, 32-bit, non-Windows */ +/* Sparc */ +/* MIPS */ +/* PPC */ +#elif __i386__ || __sun || __sgi || __ppc__ +typedef unsigned char UChar; +typedef short Int16; +typedef unsigned short UInt16; +typedef long Int32; +typedef unsigned long UInt32; +typedef long long Int64; +typedef unsigned long long UInt64; + +/* x86-64 */ +/* Note that these may be appropriate for other 64-bit machines. */ +#elif __x86_64__ || __ia64__ +typedef unsigned char UChar; +typedef short Int16; +typedef unsigned short UInt16; +typedef int Int32; +typedef unsigned int UInt32; +typedef long Int64; +typedef unsigned long UInt64; + +#else + +/* yielding an error is preferable to yielding incorrect behavior */ +#error "Please define the sized types for your platform in chm_lib.c" +#endif + +/* GCC */ +#ifdef __GNUC__ +#define memcmp __builtin_memcmp +#define memcpy __builtin_memcpy +#define strlen __builtin_strlen + +#elif defined(WIN32) +static int ffs(unsigned int val) +{ + int bit=1, idx=1; + while (bit != 0 && (val & bit) == 0) + { + bit <<= 1; + ++idx; + } + if (bit == 0) + return 0; + else + return idx; +} + +#endif + +/* utilities for unmarshalling data */ +static int _unmarshal_char_array(unsigned char **pData, + unsigned int *pLenRemain, + char *dest, + int count) +{ + if (count <= 0 || (unsigned int)count > *pLenRemain) + return 0; + memcpy(dest, (*pData), count); + *pData += count; + *pLenRemain -= count; + return 1; +} + +static int _unmarshal_uchar_array(unsigned char **pData, + unsigned int *pLenRemain, + unsigned char *dest, + int count) +{ + if (count <= 0 || (unsigned int)count > *pLenRemain) + return 0; + memcpy(dest, (*pData), count); + *pData += count; + *pLenRemain -= count; + return 1; +} + +#if 0 +static int _unmarshal_int16(unsigned char **pData, + unsigned int *pLenRemain, + Int16 *dest) +{ + if (2 > *pLenRemain) + return 0; + *dest = (*pData)[0] | (*pData)[1]<<8; + *pData += 2; + *pLenRemain -= 2; + return 1; +} + +static int _unmarshal_uint16(unsigned char **pData, + unsigned int *pLenRemain, + UInt16 *dest) +{ + if (2 > *pLenRemain) + return 0; + *dest = (*pData)[0] | (*pData)[1]<<8; + *pData += 2; + *pLenRemain -= 2; + return 1; +} +#endif + +static int _unmarshal_int32(unsigned char **pData, + unsigned int *pLenRemain, + Int32 *dest) +{ + if (4 > *pLenRemain) + return 0; + *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24; + *pData += 4; + *pLenRemain -= 4; + return 1; +} + +static int _unmarshal_uint32(unsigned char **pData, + unsigned int *pLenRemain, + UInt32 *dest) +{ + if (4 > *pLenRemain) + return 0; + *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24; + *pData += 4; + *pLenRemain -= 4; + return 1; +} + +static int _unmarshal_int64(unsigned char **pData, + unsigned int *pLenRemain, + Int64 *dest) +{ + Int64 temp; + int i; + if (8 > *pLenRemain) + return 0; + temp=0; + for(i=8; i>0; i--) + { + temp <<= 8; + temp |= (*pData)[i-1]; + } + *dest = temp; + *pData += 8; + *pLenRemain -= 8; + return 1; +} + +static int _unmarshal_uint64(unsigned char **pData, + unsigned int *pLenRemain, + UInt64 *dest) +{ + UInt64 temp; + int i; + if (8 > *pLenRemain) + return 0; + temp=0; + for(i=8; i>0; i--) + { + temp <<= 8; + temp |= (*pData)[i-1]; + } + *dest = temp; + *pData += 8; + *pLenRemain -= 8; + return 1; +} + +static int _unmarshal_uuid(unsigned char **pData, + unsigned int *pDataLen, + unsigned char *dest) +{ + return _unmarshal_uchar_array(pData, pDataLen, dest, 16); +} + +/* names of sections essential to decompression */ +static const char _CHMU_RESET_TABLE[] = + "::DataSpace/Storage/MSCompressed/Transform/" + "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/" + "InstanceData/ResetTable"; +static const char _CHMU_LZXC_CONTROLDATA[] = + "::DataSpace/Storage/MSCompressed/ControlData"; +static const char _CHMU_CONTENT[] = + "::DataSpace/Storage/MSCompressed/Content"; +static const char _CHMU_SPANINFO[] = + "::DataSpace/Storage/MSCompressed/SpanInfo"; + +/* + * structures local to this module + */ + +/* structure of ITSF headers */ +#define _CHM_ITSF_V2_LEN (0x58) +#define _CHM_ITSF_V3_LEN (0x60) +struct chmItsfHeader +{ + char signature[4]; /* 0 (ITSF) */ + Int32 version; /* 4 */ + Int32 header_len; /* 8 */ + Int32 unknown_000c; /* c */ + UInt32 last_modified; /* 10 */ + UInt32 lang_id; /* 14 */ + UChar dir_uuid[16]; /* 18 */ + UChar stream_uuid[16]; /* 28 */ + UInt64 unknown_offset; /* 38 */ + UInt64 unknown_len; /* 40 */ + UInt64 dir_offset; /* 48 */ + UInt64 dir_len; /* 50 */ + UInt64 data_offset; /* 58 (Not present before V3) */ +}; /* __attribute__ ((aligned (1))); */ + +static int _unmarshal_itsf_header(unsigned char **pData, + unsigned int *pDataLen, + struct chmItsfHeader *dest) +{ + /* we only know how to deal with the 0x58 and 0x60 byte structures */ + if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN) + return 0; + + /* unmarshal common fields */ + _unmarshal_char_array(pData, pDataLen, dest->signature, 4); + _unmarshal_int32 (pData, pDataLen, &dest->version); + _unmarshal_int32 (pData, pDataLen, &dest->header_len); + _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c); + _unmarshal_uint32 (pData, pDataLen, &dest->last_modified); + _unmarshal_uint32 (pData, pDataLen, &dest->lang_id); + _unmarshal_uuid (pData, pDataLen, dest->dir_uuid); + _unmarshal_uuid (pData, pDataLen, dest->stream_uuid); + _unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset); + _unmarshal_uint64 (pData, pDataLen, &dest->unknown_len); + _unmarshal_uint64 (pData, pDataLen, &dest->dir_offset); + _unmarshal_uint64 (pData, pDataLen, &dest->dir_len); + + /* error check the data */ + /* XXX: should also check UUIDs, probably, though with a version 3 file, + * current MS tools do not seem to use them. + */ + if (memcmp(dest->signature, "ITSF", 4) != 0) + return 0; + if (dest->version == 2) + { + if (dest->header_len < _CHM_ITSF_V2_LEN) + return 0; + } + else if (dest->version == 3) + { + if (dest->header_len < _CHM_ITSF_V3_LEN) + return 0; + } + else + return 0; + + /* now, if we have a V3 structure, unmarshal the rest. + * otherwise, compute it + */ + if (dest->version == 3) + { + if (*pDataLen != 0) + _unmarshal_uint64(pData, pDataLen, &dest->data_offset); + else + return 0; + } + else + dest->data_offset = dest->dir_offset + dest->dir_len; + + return 1; +} + +/* structure of ITSP headers */ +#define _CHM_ITSP_V1_LEN (0x54) +struct chmItspHeader +{ + char signature[4]; /* 0 (ITSP) */ + Int32 version; /* 4 */ + Int32 header_len; /* 8 */ + Int32 unknown_000c; /* c */ + UInt32 block_len; /* 10 */ + Int32 blockidx_intvl; /* 14 */ + Int32 index_depth; /* 18 */ + Int32 index_root; /* 1c */ + Int32 index_head; /* 20 */ + Int32 unknown_0024; /* 24 */ + UInt32 num_blocks; /* 28 */ + Int32 unknown_002c; /* 2c */ + UInt32 lang_id; /* 30 */ + UChar system_uuid[16]; /* 34 */ + UChar unknown_0044[16]; /* 44 */ +}; /* __attribute__ ((aligned (1))); */ + +static int _unmarshal_itsp_header(unsigned char **pData, + unsigned int *pDataLen, + struct chmItspHeader *dest) +{ + /* we only know how to deal with a 0x54 byte structures */ + if (*pDataLen != _CHM_ITSP_V1_LEN) + return 0; + + /* unmarshal fields */ + _unmarshal_char_array(pData, pDataLen, dest->signature, 4); + _unmarshal_int32 (pData, pDataLen, &dest->version); + _unmarshal_int32 (pData, pDataLen, &dest->header_len); + _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c); + _unmarshal_uint32 (pData, pDataLen, &dest->block_len); + _unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl); + _unmarshal_int32 (pData, pDataLen, &dest->index_depth); + _unmarshal_int32 (pData, pDataLen, &dest->index_root); + _unmarshal_int32 (pData, pDataLen, &dest->index_head); + _unmarshal_int32 (pData, pDataLen, &dest->unknown_0024); + _unmarshal_uint32 (pData, pDataLen, &dest->num_blocks); + _unmarshal_int32 (pData, pDataLen, &dest->unknown_002c); + _unmarshal_uint32 (pData, pDataLen, &dest->lang_id); + _unmarshal_uuid (pData, pDataLen, dest->system_uuid); + _unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16); + + /* error check the data */ + if (memcmp(dest->signature, "ITSP", 4) != 0) + return 0; + if (dest->version != 1) + return 0; + if (dest->header_len != _CHM_ITSP_V1_LEN) + return 0; + + return 1; +} + +/* structure of PMGL headers */ +static const char _chm_pmgl_marker[4] = "PMGL"; +#define _CHM_PMGL_LEN (0x14) +struct chmPmglHeader +{ + char signature[4]; /* 0 (PMGL) */ + UInt32 free_space; /* 4 */ + UInt32 unknown_0008; /* 8 */ + Int32 block_prev; /* c */ + Int32 block_next; /* 10 */ +}; /* __attribute__ ((aligned (1))); */ + +static int _unmarshal_pmgl_header(unsigned char **pData, + unsigned int *pDataLen, + struct chmPmglHeader *dest) +{ + /* we only know how to deal with a 0x14 byte structures */ + if (*pDataLen != _CHM_PMGL_LEN) + return 0; + + /* unmarshal fields */ + _unmarshal_char_array(pData, pDataLen, dest->signature, 4); + _unmarshal_uint32 (pData, pDataLen, &dest->free_space); + _unmarshal_uint32 (pData, pDataLen, &dest->unknown_0008); + _unmarshal_int32 (pData, pDataLen, &dest->block_prev); + _unmarshal_int32 (pData, pDataLen, &dest->block_next); + + /* check structure */ + if (memcmp(dest->signature, _chm_pmgl_marker, 4) != 0) + return 0; + + return 1; +} + +/* structure of PMGI headers */ +static const char _chm_pmgi_marker[4] = "PMGI"; +#define _CHM_PMGI_LEN (0x08) +struct chmPmgiHeader +{ + char signature[4]; /* 0 (PMGI) */ + UInt32 free_space; /* 4 */ +}; /* __attribute__ ((aligned (1))); */ + +static int _unmarshal_pmgi_header(unsigned char **pData, + unsigned int *pDataLen, + struct chmPmgiHeader *dest) +{ + /* we only know how to deal with a 0x8 byte structures */ + if (*pDataLen != _CHM_PMGI_LEN) + return 0; + + /* unmarshal fields */ + _unmarshal_char_array(pData, pDataLen, dest->signature, 4); + _unmarshal_uint32 (pData, pDataLen, &dest->free_space); + + /* check structure */ + if (memcmp(dest->signature, _chm_pmgi_marker, 4) != 0) + return 0; + + return 1; +} + +/* structure of LZXC reset table */ +#define _CHM_LZXC_RESETTABLE_V1_LEN (0x28) +struct chmLzxcResetTable +{ + UInt32 version; + UInt32 block_count; + UInt32 unknown; + UInt32 table_offset; + UInt64 uncompressed_len; + UInt64 compressed_len; + UInt64 block_len; +}; /* __attribute__ ((aligned (1))); */ + +static int _unmarshal_lzxc_reset_table(unsigned char **pData, + unsigned int *pDataLen, + struct chmLzxcResetTable *dest) +{ + /* we only know how to deal with a 0x28 byte structures */ + if (*pDataLen != _CHM_LZXC_RESETTABLE_V1_LEN) + return 0; + + /* unmarshal fields */ + _unmarshal_uint32 (pData, pDataLen, &dest->version); + _unmarshal_uint32 (pData, pDataLen, &dest->block_count); + _unmarshal_uint32 (pData, pDataLen, &dest->unknown); + _unmarshal_uint32 (pData, pDataLen, &dest->table_offset); + _unmarshal_uint64 (pData, pDataLen, &dest->uncompressed_len); + _unmarshal_uint64 (pData, pDataLen, &dest->compressed_len); + _unmarshal_uint64 (pData, pDataLen, &dest->block_len); + + /* check structure */ + if (dest->version != 2) + return 0; + + return 1; +} + +/* structure of LZXC control data block */ +#define _CHM_LZXC_MIN_LEN (0x18) +#define _CHM_LZXC_V2_LEN (0x1c) +struct chmLzxcControlData +{ + UInt32 size; /* 0 */ + char signature[4]; /* 4 (LZXC) */ + UInt32 version; /* 8 */ + UInt32 resetInterval; /* c */ + UInt32 windowSize; /* 10 */ + UInt32 windowsPerReset; /* 14 */ + UInt32 unknown_18; /* 18 */ +}; + +static int _unmarshal_lzxc_control_data(unsigned char **pData, + unsigned int *pDataLen, + struct chmLzxcControlData *dest) +{ + /* we want at least 0x18 bytes */ + if (*pDataLen < _CHM_LZXC_MIN_LEN) + return 0; + + /* unmarshal fields */ + _unmarshal_uint32 (pData, pDataLen, &dest->size); + _unmarshal_char_array(pData, pDataLen, dest->signature, 4); + _unmarshal_uint32 (pData, pDataLen, &dest->version); + _unmarshal_uint32 (pData, pDataLen, &dest->resetInterval); + _unmarshal_uint32 (pData, pDataLen, &dest->windowSize); + _unmarshal_uint32 (pData, pDataLen, &dest->windowsPerReset); + + if (*pDataLen >= _CHM_LZXC_V2_LEN) + _unmarshal_uint32 (pData, pDataLen, &dest->unknown_18); + else + dest->unknown_18 = 0; + + if (dest->version == 2) + { + dest->resetInterval *= 0x8000; + dest->windowSize *= 0x8000; + } + if (dest->windowSize == 0 || dest->resetInterval == 0) + return 0; + + /* for now, only support resetInterval a multiple of windowSize/2 */ + if (dest->windowSize == 1) + return 0; + if ((dest->resetInterval % (dest->windowSize/2)) != 0) + return 0; + + /* check structure */ + if (memcmp(dest->signature, "LZXC", 4) != 0) + return 0; + + return 1; +} + +/* the structure used for chm file handles */ +struct chmFile +{ +#ifdef WIN32 + HANDLE fd; +#else + int fd; +#endif + +#ifdef CHM_MT +#ifdef WIN32 + CRITICAL_SECTION mutex; + CRITICAL_SECTION lzx_mutex; + CRITICAL_SECTION cache_mutex; +#else + pthread_mutex_t mutex; + pthread_mutex_t lzx_mutex; + pthread_mutex_t cache_mutex; +#endif +#endif + + UInt64 dir_offset; + UInt64 dir_len; + UInt64 data_offset; + Int32 index_root; + Int32 index_head; + UInt32 block_len; + + UInt64 span; + struct chmUnitInfo rt_unit; + struct chmUnitInfo cn_unit; + struct chmLzxcResetTable reset_table; + + /* LZX control data */ + int compression_enabled; + UInt32 window_size; + UInt32 reset_interval; + UInt32 reset_blkcount; + + /* decompressor state */ + struct LZXstate *lzx_state; + int lzx_last_block; + + /* cache for decompressed blocks */ + UChar **cache_blocks; + UInt64 *cache_block_indices; + Int32 cache_num_blocks; +}; + +/* + * utility functions local to this module + */ + +/* utility function to handle differences between {pread,read}(64)? */ +static Int64 _chm_fetch_bytes(struct chmFile *h, + UChar *buf, + UInt64 os, + Int64 len) +{ + Int64 readLen=0, oldOs=0; + if (h->fd == CHM_NULL_FD) + return readLen; + + CHM_ACQUIRE_LOCK(h->mutex); +#ifdef CHM_USE_WIN32IO + /* NOTE: this might be better done with CreateFileMapping, et cetera... */ + { + DWORD origOffsetLo=0, origOffsetHi=0; + DWORD offsetLo, offsetHi; + DWORD actualLen=0; + + /* awkward Win32 Seek/Tell */ + offsetLo = (unsigned int)(os & 0xffffffffL); + offsetHi = (unsigned int)((os >> 32) & 0xffffffffL); + origOffsetLo = SetFilePointer(h->fd, 0, &origOffsetHi, FILE_CURRENT); + offsetLo = SetFilePointer(h->fd, offsetLo, &offsetHi, FILE_BEGIN); + + /* read the data */ + if (ReadFile(h->fd, + buf, + (DWORD)len, + &actualLen, + NULL) == TRUE) + readLen = actualLen; + else + readLen = 0; + + /* restore original position */ + SetFilePointer(h->fd, origOffsetLo, &origOffsetHi, FILE_BEGIN); + } +#else +#ifdef CHM_USE_PREAD +#ifdef CHM_USE_IO64 + readLen = pread64(h->fd, buf, (long)len, os); +#else + readLen = pread(h->fd, buf, (long)len, (unsigned int)os); +#endif +#else +#ifdef CHM_USE_IO64 + oldOs = lseek64(h->fd, 0, SEEK_CUR); + lseek64(h->fd, os, SEEK_SET); + readLen = read(h->fd, buf, len); + lseek64(h->fd, oldOs, SEEK_SET); +#else + oldOs = lseek(h->fd, 0, SEEK_CUR); + lseek(h->fd, (long)os, SEEK_SET); + readLen = read(h->fd, buf, len); + lseek(h->fd, (long)oldOs, SEEK_SET); +#endif +#endif +#endif + CHM_RELEASE_LOCK(h->mutex); + return readLen; +} + +/* open an ITS archive */ +#ifdef PPC_BSTR +/* RWE 6/12/2003 */ +struct chmFile *chm_open(BSTR filename) +#else +struct chmFile *chm_open(const char *filename) +#endif +{ + unsigned char sbuffer[256]; + unsigned int sremain; + unsigned char *sbufpos; + struct chmFile *newHandle=NULL; + struct chmItsfHeader itsfHeader; + struct chmItspHeader itspHeader; +#if 0 + struct chmUnitInfo uiSpan; +#endif + struct chmUnitInfo uiLzxc; + struct chmLzxcControlData ctlData; + + /* allocate handle */ + newHandle = (struct chmFile *)malloc(sizeof(struct chmFile)); + if (newHandle == NULL) + return NULL; + newHandle->fd = CHM_NULL_FD; + newHandle->lzx_state = NULL; + newHandle->cache_blocks = NULL; + newHandle->cache_block_indices = NULL; + newHandle->cache_num_blocks = 0; + + /* open file */ +#ifdef WIN32 +#ifdef PPC_BSTR + if ((newHandle->fd=CreateFile(filename, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL)) == CHM_NULL_FD) + { + free(newHandle); + return NULL; + } +#else + if ((newHandle->fd=CreateFileA(filename, + GENERIC_READ, + 0, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL)) == CHM_NULL_FD) + { + free(newHandle); + return NULL; + } +#endif +#else + if ((newHandle->fd=open(filename, O_RDONLY)) == CHM_NULL_FD) + { + free(newHandle); + return NULL; + } +#endif + + /* initialize mutexes, if needed */ +#ifdef CHM_MT +#ifdef WIN32 + InitializeCriticalSection(&newHandle->mutex); + InitializeCriticalSection(&newHandle->lzx_mutex); + InitializeCriticalSection(&newHandle->cache_mutex); +#else + pthread_mutex_init(&newHandle->mutex, NULL); + pthread_mutex_init(&newHandle->lzx_mutex, NULL); + pthread_mutex_init(&newHandle->cache_mutex, NULL); +#endif +#endif + + /* read and verify header */ + sremain = _CHM_ITSF_V3_LEN; + sbufpos = sbuffer; + if (_chm_fetch_bytes(newHandle, sbuffer, (UInt64)0, sremain) != sremain || + !_unmarshal_itsf_header(&sbufpos, &sremain, &itsfHeader)) + { + chm_close(newHandle); + return NULL; + } + + /* stash important values from header */ + newHandle->dir_offset = itsfHeader.dir_offset; + newHandle->dir_len = itsfHeader.dir_len; + newHandle->data_offset = itsfHeader.data_offset; + + /* now, read and verify the directory header chunk */ + sremain = _CHM_ITSP_V1_LEN; + sbufpos = sbuffer; + if (_chm_fetch_bytes(newHandle, sbuffer, + (UInt64)itsfHeader.dir_offset, sremain) != sremain || + !_unmarshal_itsp_header(&sbufpos, &sremain, &itspHeader)) + { + chm_close(newHandle); + return NULL; + } + + /* grab essential information from ITSP header */ + newHandle->dir_offset += itspHeader.header_len; + newHandle->dir_len -= itspHeader.header_len; + newHandle->index_root = itspHeader.index_root; + newHandle->index_head = itspHeader.index_head; + newHandle->block_len = itspHeader.block_len; + + /* if the index root is -1, this means we don't have any PMGI blocks. + * as a result, we must use the sole PMGL block as the index root + */ + if (newHandle->index_root <= -1) + newHandle->index_root = newHandle->index_head; + + /* By default, compression is enabled. */ + newHandle->compression_enabled = 1; + +/* Jed, Sun Jun 27: 'span' doesn't seem to be used anywhere?! */ +#if 0 + /* fetch span */ + if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, + _CHMU_SPANINFO, + &uiSpan) || + uiSpan.space == CHM_COMPRESSED) + { + chm_close(newHandle); + return NULL; + } + + /* N.B.: we've already checked that uiSpan is in the uncompressed section, + * so this should not require attempting to decompress, which may + * rely on having a valid "span" + */ + sremain = 8; + sbufpos = sbuffer; + if (chm_retrieve_object(newHandle, &uiSpan, sbuffer, + 0, sremain) != sremain || + !_unmarshal_uint64(&sbufpos, &sremain, &newHandle->span)) + { + chm_close(newHandle); + return NULL; + } +#endif + + /* prefetch most commonly needed unit infos */ + if (CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, + _CHMU_RESET_TABLE, + &newHandle->rt_unit) || + newHandle->rt_unit.space == CHM_COMPRESSED || + CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, + _CHMU_CONTENT, + &newHandle->cn_unit) || + newHandle->cn_unit.space == CHM_COMPRESSED || + CHM_RESOLVE_SUCCESS != chm_resolve_object(newHandle, + _CHMU_LZXC_CONTROLDATA, + &uiLzxc) || + uiLzxc.space == CHM_COMPRESSED) + { + newHandle->compression_enabled = 0; + } + + /* read reset table info */ + if (newHandle->compression_enabled) + { + sremain = _CHM_LZXC_RESETTABLE_V1_LEN; + sbufpos = sbuffer; + if (chm_retrieve_object(newHandle, &newHandle->rt_unit, sbuffer, + 0, sremain) != sremain || + !_unmarshal_lzxc_reset_table(&sbufpos, &sremain, + &newHandle->reset_table)) + { + newHandle->compression_enabled = 0; + } + } + + /* read control data */ + if (newHandle->compression_enabled) + { + sremain = (unsigned int)uiLzxc.length; + if (uiLzxc.length > sizeof(sbuffer)) + { + chm_close(newHandle); + return NULL; + } + + sbufpos = sbuffer; + if (chm_retrieve_object(newHandle, &uiLzxc, sbuffer, + 0, sremain) != sremain || + !_unmarshal_lzxc_control_data(&sbufpos, &sremain, + &ctlData)) + { + newHandle->compression_enabled = 0; + } + + newHandle->window_size = ctlData.windowSize; + newHandle->reset_interval = ctlData.resetInterval; + +/* Jed, Mon Jun 28: Experimentally, it appears that the reset block count */ +/* must be multiplied by this formerly unknown ctrl data field in */ +/* order to decompress some files. */ +#if 0 + newHandle->reset_blkcount = newHandle->reset_interval / + (newHandle->window_size / 2); +#else + newHandle->reset_blkcount = newHandle->reset_interval / + (newHandle->window_size / 2) * + ctlData.windowsPerReset; +#endif + } + + /* initialize cache */ + chm_set_param(newHandle, CHM_PARAM_MAX_BLOCKS_CACHED, + CHM_MAX_BLOCKS_CACHED); + + return newHandle; +} + +/* close an ITS archive */ +void chm_close(struct chmFile *h) +{ + if (h != NULL) + { + if (h->fd != CHM_NULL_FD) + CHM_CLOSE_FILE(h->fd); + h->fd = CHM_NULL_FD; + +#ifdef CHM_MT +#ifdef WIN32 + DeleteCriticalSection(&h->mutex); + DeleteCriticalSection(&h->lzx_mutex); + DeleteCriticalSection(&h->cache_mutex); +#else + pthread_mutex_destroy(&h->mutex); + pthread_mutex_destroy(&h->lzx_mutex); + pthread_mutex_destroy(&h->cache_mutex); +#endif +#endif + + if (h->lzx_state) + LZXteardown(h->lzx_state); + h->lzx_state = NULL; + + if (h->cache_blocks) + { + int i; + for (i=0; i<h->cache_num_blocks; i++) + { + if (h->cache_blocks[i]) + free(h->cache_blocks[i]); + } + free(h->cache_blocks); + h->cache_blocks = NULL; + } + + if (h->cache_block_indices) + free(h->cache_block_indices); + h->cache_block_indices = NULL; + + free(h); + } +} + +/* + * set a parameter on the file handle. + * valid parameter types: + * CHM_PARAM_MAX_BLOCKS_CACHED: + * how many decompressed blocks should be cached? A simple + * caching scheme is used, wherein the index of the block is + * used as a hash value, and hash collision results in the + * invalidation of the previously cached block. + */ +void chm_set_param(struct chmFile *h, + int paramType, + int paramVal) +{ + switch (paramType) + { + case CHM_PARAM_MAX_BLOCKS_CACHED: + CHM_ACQUIRE_LOCK(h->cache_mutex); + if (paramVal != h->cache_num_blocks) + { + UChar **newBlocks; + UInt64 *newIndices; + int i; + + /* allocate new cached blocks */ + newBlocks = (UChar **)malloc(paramVal * sizeof (UChar *)); + if (newBlocks == NULL) return; + newIndices = (UInt64 *)malloc(paramVal * sizeof (UInt64)); + if (newIndices == NULL) { free(newBlocks); return; } + for (i=0; i<paramVal; i++) + { + newBlocks[i] = NULL; + newIndices[i] = 0; + } + + /* re-distribute old cached blocks */ + if (h->cache_blocks) + { + for (i=0; i<h->cache_num_blocks; i++) + { + int newSlot = (int)(h->cache_block_indices[i] % paramVal); + + if (h->cache_blocks[i]) + { + /* in case of collision, destroy newcomer */ + if (newBlocks[newSlot]) + { + free(h->cache_blocks[i]); + h->cache_blocks[i] = NULL; + } + else + { + newBlocks[newSlot] = h->cache_blocks[i]; + newIndices[newSlot] = + h->cache_block_indices[i]; + } + } + } + + free(h->cache_blocks); + free(h->cache_block_indices); + } + + /* now, set new values */ + h->cache_blocks = newBlocks; + h->cache_block_indices = newIndices; + h->cache_num_blocks = paramVal; + } + CHM_RELEASE_LOCK(h->cache_mutex); + break; + + default: + break; + } +} + +/* + * helper methods for chm_resolve_object + */ + +/* skip a compressed dword */ +static void _chm_skip_cword(UChar **pEntry) +{ + while (*(*pEntry)++ >= 0x80) + ; +} + +/* skip the data from a PMGL entry */ +static void _chm_skip_PMGL_entry_data(UChar **pEntry) +{ + _chm_skip_cword(pEntry); + _chm_skip_cword(pEntry); + _chm_skip_cword(pEntry); +} + +/* parse a compressed dword */ +static UInt64 _chm_parse_cword(UChar **pEntry) +{ + UInt64 accum = 0; + UChar temp; + while ((temp=*(*pEntry)++) >= 0x80) + { + accum <<= 7; + accum += temp & 0x7f; + } + + return (accum << 7) + temp; +} + +/* parse a utf-8 string into an ASCII char buffer */ +static int _chm_parse_UTF8(UChar **pEntry, UInt64 count, char *path) +{ + /* XXX: implement UTF-8 support, including a real mapping onto + * ISO-8859-1? probably there is a library to do this? As is + * immediately apparent from the below code, I'm presently not doing + * any special handling for files in which none of the strings contain + * UTF-8 multi-byte characters. + */ + while (count != 0) + { + *path++ = (char)(*(*pEntry)++); + --count; + } + + *path = '\0'; + return 1; +} + +/* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */ +static int _chm_parse_PMGL_entry(UChar **pEntry, struct chmUnitInfo *ui) +{ + UInt64 strLen; + + /* parse str len */ + strLen = _chm_parse_cword(pEntry); + if (strLen > CHM_MAX_PATHLEN) + return 0; + + /* parse path */ + if (! _chm_parse_UTF8(pEntry, strLen, ui->path)) + return 0; + + /* parse info */ + ui->space = (int)_chm_parse_cword(pEntry); + ui->start = _chm_parse_cword(pEntry); + ui->length = _chm_parse_cword(pEntry); + return 1; +} + +/* find an exact entry in PMGL; return NULL if we fail */ +static UChar *_chm_find_in_PMGL(UChar *page_buf, + UInt32 block_len, + const char *objPath) +{ + /* XXX: modify this to do a binary search using the nice index structure + * that is provided for us. + */ + struct chmPmglHeader header; + unsigned int hremain; + UChar *end; + UChar *cur; + UChar *temp; + UInt64 strLen; + char buffer[CHM_MAX_PATHLEN+1]; + + /* figure out where to start and end */ + cur = page_buf; + hremain = _CHM_PMGL_LEN; + if (! _unmarshal_pmgl_header(&cur, &hremain, &header)) + return NULL; + end = page_buf + block_len - (header.free_space); + + /* now, scan progressively */ + while (cur < end) + { + /* grab the name */ + temp = cur; + strLen = _chm_parse_cword(&cur); + if (strLen > CHM_MAX_PATHLEN) + return NULL; + if (! _chm_parse_UTF8(&cur, strLen, buffer)) + return NULL; + + /* check if it is the right name */ + if (! strcasecmp(buffer, objPath)) + return temp; + + _chm_skip_PMGL_entry_data(&cur); + } + + return NULL; +} + +/* find which block should be searched next for the entry; -1 if no block */ +static Int32 _chm_find_in_PMGI(UChar *page_buf, + UInt32 block_len, + const char *objPath) +{ + /* XXX: modify this to do a binary search using the nice index structure + * that is provided for us + */ + struct chmPmgiHeader header; + unsigned int hremain; + int page=-1; + UChar *end; + UChar *cur; + UInt64 strLen; + char buffer[CHM_MAX_PATHLEN+1]; + + /* figure out where to start and end */ + cur = page_buf; + hremain = _CHM_PMGI_LEN; + if (! _unmarshal_pmgi_header(&cur, &hremain, &header)) + return -1; + end = page_buf + block_len - (header.free_space); + + /* now, scan progressively */ + while (cur < end) + { + /* grab the name */ + strLen = _chm_parse_cword(&cur); + if (strLen > CHM_MAX_PATHLEN) + return -1; + if (! _chm_parse_UTF8(&cur, strLen, buffer)) + return -1; + + /* check if it is the right name */ + if (strcasecmp(buffer, objPath) > 0) + return page; + + /* load next value for path */ + page = (int)_chm_parse_cword(&cur); + } + + return page; +} + +/* resolve a particular object from the archive */ +int chm_resolve_object(struct chmFile *h, + const char *objPath, + struct chmUnitInfo *ui) +{ + /* + * XXX: implement caching scheme for dir pages + */ + + Int32 curPage; + + /* buffer to hold whatever page we're looking at */ + /* RWE 6/12/2003 */ + UChar *page_buf = malloc(h->block_len); + if (page_buf == NULL) + return CHM_RESOLVE_FAILURE; + + /* starting page */ + curPage = h->index_root; + + /* until we have either returned or given up */ + while (curPage != -1) + { + + /* try to fetch the index page */ + if (_chm_fetch_bytes(h, page_buf, + (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, + h->block_len) != h->block_len) + { + free(page_buf); + return CHM_RESOLVE_FAILURE; + } + + /* now, if it is a leaf node: */ + if (memcmp(page_buf, _chm_pmgl_marker, 4) == 0) + { + /* scan block */ + UChar *pEntry = _chm_find_in_PMGL(page_buf, + h->block_len, + objPath); + if (pEntry == NULL) + { + free(page_buf); + return CHM_RESOLVE_FAILURE; + } + + /* parse entry and return */ + _chm_parse_PMGL_entry(&pEntry, ui); + free(page_buf); + return CHM_RESOLVE_SUCCESS; + } + + /* else, if it is a branch node: */ + else if (memcmp(page_buf, _chm_pmgi_marker, 4) == 0) + curPage = _chm_find_in_PMGI(page_buf, h->block_len, objPath); + + /* else, we are confused. give up. */ + else + { + free(page_buf); + return CHM_RESOLVE_FAILURE; + } + } + + /* didn't find anything. fail. */ + free(page_buf); + return CHM_RESOLVE_FAILURE; +} + +/* + * utility methods for dealing with compressed data + */ + +/* get the bounds of a compressed block. return 0 on failure */ +static int _chm_get_cmpblock_bounds(struct chmFile *h, + UInt64 block, + UInt64 *start, + Int64 *len) +{ + UChar buffer[8], *dummy; + unsigned int remain; + + /* for all but the last block, use the reset table */ + if (block < h->reset_table.block_count-1) + { + /* unpack the start address */ + dummy = buffer; + remain = 8; + if (_chm_fetch_bytes(h, buffer, + (UInt64)h->data_offset + + (UInt64)h->rt_unit.start + + (UInt64)h->reset_table.table_offset + + (UInt64)block*8, + remain) != remain || + !_unmarshal_uint64(&dummy, &remain, start)) + return 0; + + /* unpack the end address */ + dummy = buffer; + remain = 8; + if (_chm_fetch_bytes(h, buffer, + (UInt64)h->data_offset + + (UInt64)h->rt_unit.start + + (UInt64)h->reset_table.table_offset + + (UInt64)block*8 + 8, + remain) != remain || + !_unmarshal_int64(&dummy, &remain, len)) + return 0; + } + + /* for the last block, use the span in addition to the reset table */ + else + { + /* unpack the start address */ + dummy = buffer; + remain = 8; + if (_chm_fetch_bytes(h, buffer, + (UInt64)h->data_offset + + (UInt64)h->rt_unit.start + + (UInt64)h->reset_table.table_offset + + (UInt64)block*8, + remain) != remain || + !_unmarshal_uint64(&dummy, &remain, start)) + return 0; + + *len = h->reset_table.compressed_len; + } + + /* compute the length and absolute start address */ + *len -= *start; + *start += h->data_offset + h->cn_unit.start; + + return 1; +} + +/* decompress the block. must have lzx_mutex. */ +static Int64 _chm_decompress_block(struct chmFile *h, + UInt64 block, + UChar **ubuffer) +{ + UChar *cbuffer = malloc(((unsigned int)h->reset_table.block_len + 6144)); + UInt64 cmpStart; /* compressed start */ + Int64 cmpLen; /* compressed len */ + int indexSlot; /* cache index slot */ + UChar *lbuffer; /* local buffer ptr */ + UInt32 blockAlign = (UInt32)(block % h->reset_blkcount); /* reset intvl. aln. */ + UInt32 i; /* local loop index */ + + if (cbuffer == NULL) + return -1; + + /* let the caching system pull its weight! */ + if (block - blockAlign <= h->lzx_last_block && + block >= h->lzx_last_block) + blockAlign = (block - h->lzx_last_block); + + /* check if we need previous blocks */ + if (blockAlign != 0) + { + /* fetch all required previous blocks since last reset */ + for (i = blockAlign; i > 0; i--) + { + UInt32 curBlockIdx = block - i; + + /* check if we most recently decompressed the previous block */ + if (h->lzx_last_block != curBlockIdx) + { + if ((curBlockIdx % h->reset_blkcount) == 0) + { +#ifdef CHM_DEBUG + fprintf(stderr, "***RESET (1)***\n"); +#endif + LZXreset(h->lzx_state); + } + + indexSlot = (int)((curBlockIdx) % h->cache_num_blocks); + if (! h->cache_blocks[indexSlot]) + h->cache_blocks[indexSlot] = (UChar *)malloc((unsigned int)(h->reset_table.block_len)); + if (! h->cache_blocks[indexSlot]) + { + free(cbuffer); + return -1; + } + h->cache_block_indices[indexSlot] = curBlockIdx; + lbuffer = h->cache_blocks[indexSlot]; + + /* decompress the previous block */ +#ifdef CHM_DEBUG + fprintf(stderr, "Decompressing block #%4d (EXTRA)\n", curBlockIdx); +#endif + if (!_chm_get_cmpblock_bounds(h, curBlockIdx, &cmpStart, &cmpLen) || + cmpLen < 0 || + cmpLen > h->reset_table.block_len + 6144 || + _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen || + LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen, + (int)h->reset_table.block_len) != DECR_OK) + { +#ifdef CHM_DEBUG + fprintf(stderr, " (DECOMPRESS FAILED!)\n"); +#endif + free(cbuffer); + return (Int64)0; + } + + h->lzx_last_block = (int)curBlockIdx; + } + } + } + else + { + if ((block % h->reset_blkcount) == 0) + { +#ifdef CHM_DEBUG + fprintf(stderr, "***RESET (2)***\n"); +#endif + LZXreset(h->lzx_state); + } + } + + /* allocate slot in cache */ + indexSlot = (int)(block % h->cache_num_blocks); + if (! h->cache_blocks[indexSlot]) + h->cache_blocks[indexSlot] = (UChar *)malloc(((unsigned int)h->reset_table.block_len)); + if (! h->cache_blocks[indexSlot]) + { + free(cbuffer); + return -1; + } + h->cache_block_indices[indexSlot] = block; + lbuffer = h->cache_blocks[indexSlot]; + *ubuffer = lbuffer; + + /* decompress the block we actually want */ +#ifdef CHM_DEBUG + fprintf(stderr, "Decompressing block #%4d (REAL )\n", block); +#endif + if (! _chm_get_cmpblock_bounds(h, block, &cmpStart, &cmpLen) || + _chm_fetch_bytes(h, cbuffer, cmpStart, cmpLen) != cmpLen || + LZXdecompress(h->lzx_state, cbuffer, lbuffer, (int)cmpLen, + (int)h->reset_table.block_len) != DECR_OK) + { +#ifdef CHM_DEBUG + fprintf(stderr, " (DECOMPRESS FAILED!)\n"); +#endif + free(cbuffer); + return (Int64)0; + } + h->lzx_last_block = (int)block; + + /* XXX: modify LZX routines to return the length of the data they + * decompressed and return that instead, for an extra sanity check. + */ + free(cbuffer); + return h->reset_table.block_len; +} + +/* grab a region from a compressed block */ +static Int64 _chm_decompress_region(struct chmFile *h, + UChar *buf, + UInt64 start, + Int64 len) +{ + UInt64 nBlock, nOffset; + UInt64 nLen; + UInt64 gotLen; + UChar *ubuffer; + + if (len <= 0) + return (Int64)0; + + /* figure out what we need to read */ + nBlock = start / h->reset_table.block_len; + nOffset = start % h->reset_table.block_len; + nLen = len; + if (nLen > (h->reset_table.block_len - nOffset)) + nLen = h->reset_table.block_len - nOffset; + + /* if block is cached, return data from it. */ + CHM_ACQUIRE_LOCK(h->lzx_mutex); + CHM_ACQUIRE_LOCK(h->cache_mutex); + if (h->cache_block_indices[nBlock % h->cache_num_blocks] == nBlock && + h->cache_blocks[nBlock % h->cache_num_blocks] != NULL) + { + memcpy(buf, + h->cache_blocks[nBlock % h->cache_num_blocks] + nOffset, + (unsigned int)nLen); + CHM_RELEASE_LOCK(h->cache_mutex); + CHM_RELEASE_LOCK(h->lzx_mutex); + return nLen; + } + CHM_RELEASE_LOCK(h->cache_mutex); + + /* data request not satisfied, so... start up the decompressor machine */ + if (! h->lzx_state) + { + int window_size = ffs(h->window_size) - 1; + h->lzx_last_block = -1; + h->lzx_state = LZXinit(window_size); + } + + /* decompress some data */ + gotLen = _chm_decompress_block(h, nBlock, &ubuffer); + if (gotLen < nLen) + nLen = gotLen; + memcpy(buf, ubuffer+nOffset, (unsigned int)nLen); + CHM_RELEASE_LOCK(h->lzx_mutex); + return nLen; +} + +/* retrieve (part of) an object */ +LONGINT64 chm_retrieve_object(struct chmFile *h, + const struct chmUnitInfo *ui, + unsigned char *buf, + LONGUINT64 addr, + LONGINT64 len) +{ + /* must be valid file handle */ + if (h == NULL) + return (Int64)0; + + /* starting address must be in correct range */ + if ( addr >= ui->length) + return (Int64)0; + + /* clip length */ + if (addr + len > ui->length) + len = ui->length - addr; + + /* if the file is uncompressed, it's simple */ + if (ui->space == CHM_UNCOMPRESSED) + { + /* read data */ + return _chm_fetch_bytes(h, + buf, + (UInt64)h->data_offset + (UInt64)ui->start + (UInt64)addr, + len); + } + + /* else if the file is compressed, it's a little trickier */ + else /* ui->space == CHM_COMPRESSED */ + { + Int64 swath=0, total=0; + + /* if compression is not enabled for this file... */ + if (! h->compression_enabled) + return total; + + do { + + /* swill another mouthful */ + swath = _chm_decompress_region(h, buf, ui->start + addr, len); + + /* if we didn't get any... */ + if (swath == 0) + return total; + + /* update stats */ + total += swath; + len -= swath; + addr += swath; + buf += swath; + + } while (len != 0); + + return total; + } +} + +/* enumerate the objects in the .chm archive */ +int chm_enumerate(struct chmFile *h, + int what, + CHM_ENUMERATOR e, + void *context) +{ + Int32 curPage; + + /* buffer to hold whatever page we're looking at */ + /* RWE 6/12/2003 */ + UChar *page_buf = malloc((unsigned int)h->block_len); + struct chmPmglHeader header; + UChar *end; + UChar *cur; + unsigned int lenRemain; + UInt64 ui_path_len; + + /* the current ui */ + struct chmUnitInfo ui; + int type_bits = (what & 0x7); + int filter_bits = (what & 0xF8); + + if (page_buf == NULL) + return 0; + + /* starting page */ + curPage = h->index_head; + + /* until we have either returned or given up */ + while (curPage != -1) + { + + /* try to fetch the index page */ + if (_chm_fetch_bytes(h, + page_buf, + (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, + h->block_len) != h->block_len) + { + free(page_buf); + return 0; + } + + /* figure out start and end for this page */ + cur = page_buf; + lenRemain = _CHM_PMGL_LEN; + if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header)) + { + free(page_buf); + return 0; + } + end = page_buf + h->block_len - (header.free_space); + + /* loop over this page */ + while (cur < end) + { + ui.flags = 0; + + if (! _chm_parse_PMGL_entry(&cur, &ui)) + { + free(page_buf); + return 0; + } + + /* get the length of the path */ + ui_path_len = strlen(ui.path)-1; + + /* check for DIRS */ + if (ui.path[ui_path_len] == '/') + ui.flags |= CHM_ENUMERATE_DIRS; + + /* check for FILES */ + if (ui.path[ui_path_len] != '/') + ui.flags |= CHM_ENUMERATE_FILES; + + /* check for NORMAL vs. META */ + if (ui.path[0] == '/') + { + + /* check for NORMAL vs. SPECIAL */ + if (ui.path[1] == '#' || ui.path[1] == '$') + ui.flags |= CHM_ENUMERATE_SPECIAL; + else + ui.flags |= CHM_ENUMERATE_NORMAL; + } + else + ui.flags |= CHM_ENUMERATE_META; + + if (! (type_bits & ui.flags)) + continue; + + if (filter_bits && ! (filter_bits & ui.flags)) + continue; + + /* call the enumerator */ + { + int status = (*e)(h, &ui, context); + switch (status) + { + case CHM_ENUMERATOR_FAILURE: + free(page_buf); + return 0; + case CHM_ENUMERATOR_CONTINUE: + break; + case CHM_ENUMERATOR_SUCCESS: + free(page_buf); + return 1; + default: + break; + } + } + } + + /* advance to next page */ + curPage = header.block_next; + } + + free(page_buf); + return 1; +} + +int chm_enumerate_dir(struct chmFile *h, + const char *prefix, + int what, + CHM_ENUMERATOR e, + void *context) +{ + /* + * XXX: do this efficiently (i.e. using the tree index) + */ + + Int32 curPage; + + /* buffer to hold whatever page we're looking at */ + /* RWE 6/12/2003 */ + UChar *page_buf = malloc((unsigned int)h->block_len); + struct chmPmglHeader header; + UChar *end; + UChar *cur; + unsigned int lenRemain; + + /* set to 1 once we've started */ + int it_has_begun=0; + + /* the current ui */ + struct chmUnitInfo ui; + int type_bits = (what & 0x7); + int filter_bits = (what & 0xF8); + UInt64 ui_path_len; + + /* the length of the prefix */ + char prefixRectified[CHM_MAX_PATHLEN+1]; + int prefixLen; + char lastPath[CHM_MAX_PATHLEN+1]; + int lastPathLen; + + if (page_buf == NULL) + return 0; + + /* starting page */ + curPage = h->index_head; + + /* initialize pathname state */ + strncpy(prefixRectified, prefix, CHM_MAX_PATHLEN); + prefixRectified[CHM_MAX_PATHLEN] = '\0'; + prefixLen = strlen(prefixRectified); + if (prefixLen != 0) + { + if (prefixRectified[prefixLen-1] != '/') + { + prefixRectified[prefixLen] = '/'; + prefixRectified[prefixLen+1] = '\0'; + ++prefixLen; + } + } + lastPath[0] = '\0'; + lastPathLen = -1; + + /* until we have either returned or given up */ + while (curPage != -1) + { + + /* try to fetch the index page */ + if (_chm_fetch_bytes(h, + page_buf, + (UInt64)h->dir_offset + (UInt64)curPage*h->block_len, + h->block_len) != h->block_len) + { + free(page_buf); + return 0; + } + + /* figure out start and end for this page */ + cur = page_buf; + lenRemain = _CHM_PMGL_LEN; + if (! _unmarshal_pmgl_header(&cur, &lenRemain, &header)) + { + free(page_buf); + return 0; + } + end = page_buf + h->block_len - (header.free_space); + + /* loop over this page */ + while (cur < end) + { + ui.flags = 0; + + if (! _chm_parse_PMGL_entry(&cur, &ui)) + { + free(page_buf); + return 0; + } + + /* check if we should start */ + if (! it_has_begun) + { + if (ui.length == 0 && strncasecmp(ui.path, prefixRectified, prefixLen) == 0) + it_has_begun = 1; + else + continue; + + if (ui.path[prefixLen] == '\0') + continue; + } + + /* check if we should stop */ + else + { + if (strncasecmp(ui.path, prefixRectified, prefixLen) != 0) + { + free(page_buf); + return 1; + } + } + + /* check if we should include this path */ + if (lastPathLen != -1) + { + if (strncasecmp(ui.path, lastPath, lastPathLen) == 0) + continue; + } + strncpy(lastPath, ui.path, CHM_MAX_PATHLEN); + lastPath[CHM_MAX_PATHLEN] = '\0'; + lastPathLen = strlen(lastPath); + + /* get the length of the path */ + ui_path_len = strlen(ui.path)-1; + + /* check for DIRS */ + if (ui.path[ui_path_len] == '/') + ui.flags |= CHM_ENUMERATE_DIRS; + + /* check for FILES */ + if (ui.path[ui_path_len] != '/') + ui.flags |= CHM_ENUMERATE_FILES; + + /* check for NORMAL vs. META */ + if (ui.path[0] == '/') + { + + /* check for NORMAL vs. SPECIAL */ + if (ui.path[1] == '#' || ui.path[1] == '$') + ui.flags |= CHM_ENUMERATE_SPECIAL; + else + ui.flags |= CHM_ENUMERATE_NORMAL; + } + else + ui.flags |= CHM_ENUMERATE_META; + + if (! (type_bits & ui.flags)) + continue; + + if (filter_bits && ! (filter_bits & ui.flags)) + continue; + + /* call the enumerator */ + { + int status = (*e)(h, &ui, context); + switch (status) + { + case CHM_ENUMERATOR_FAILURE: + free(page_buf); + return 0; + case CHM_ENUMERATOR_CONTINUE: + break; + case CHM_ENUMERATOR_SUCCESS: + free(page_buf); + return 1; + default: + break; + } + } + } + + /* advance to next page */ + curPage = header.block_next; + } + + free(page_buf); + return 1; +} diff --git a/lib/chmlib/chm_lib.h b/lib/chmlib/chm_lib.h new file mode 100644 index 0000000..2fb87a4 --- /dev/null +++ b/lib/chmlib/chm_lib.h @@ -0,0 +1,144 @@ +/* $Id: chm_lib.h 148 2007-03-07 09:22:36Z gyunaev $ */ +/*************************************************************************** + * chm_lib.h - CHM archive manipulation routines * + * ------------------- * + * * + * author: Jed Wing <[email protected]> * + * version: 0.3 * + * notes: These routines are meant for the manipulation of microsoft * + * .chm (compiled html help) files, but may likely be used * + * for the manipulation of any ITSS archive, if ever ITSS * + * archives are used for any other purpose. * + * * + * Note also that the section names are statically handled. * + * To be entirely correct, the section names should be read * + * from the section names meta-file, and then the various * + * content sections and the "transforms" to apply to the data * + * they contain should be inferred from the section name and * + * the meta-files referenced using that name; however, all of * + * the files I've been able to get my hands on appear to have * + * only two sections: Uncompressed and MSCompressed. * + * Additionally, the ITSS.DLL file included with Windows does * + * not appear to handle any different transforms than the * + * simple LZX-transform. Furthermore, the list of transforms * + * to apply is broken, in that only half the required space * + * is allocated for the list. (It appears as though the * + * space is allocated for ASCII strings, but the strings are * + * written as unicode. As a result, only the first half of * + * the string appears.) So this is probably not too big of * + * a deal, at least until CHM v4 (MS .lit files), which also * + * incorporate encryption, of some description. * + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU Lesser General Public License as * + * published by the Free Software Foundation; either version 2.1 of the * + * License, or (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef INCLUDED_CHMLIB_H +#define INCLUDED_CHMLIB_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* RWE 6/12/1002 */ +#ifdef PPC_BSTR +#include <wtypes.h> +#endif + +#ifdef WIN32 +#ifdef __MINGW32__ +#define __int64 long long +#endif +typedef unsigned __int64 LONGUINT64; +typedef __int64 LONGINT64; +#else +typedef unsigned long long LONGUINT64; +typedef long long LONGINT64; +#endif + +/* the two available spaces in a CHM file */ +/* N.B.: The format supports arbitrarily many spaces, but only */ +/* two appear to be used at present. */ +#define CHM_UNCOMPRESSED (0) +#define CHM_COMPRESSED (1) + +/* structure representing an ITS (CHM) file stream */ +struct chmFile; + +/* structure representing an element from an ITS file stream */ +#define CHM_MAX_PATHLEN (512) +struct chmUnitInfo +{ + LONGUINT64 start; + LONGUINT64 length; + int space; + int flags; + char path[CHM_MAX_PATHLEN+1]; +}; + +/* open an ITS archive */ +#ifdef PPC_BSTR +/* RWE 6/12/2003 */ +struct chmFile* chm_open(BSTR filename); +#else +struct chmFile* chm_open(const char *filename); +#endif + +/* close an ITS archive */ +void chm_close(struct chmFile *h); + +/* methods for ssetting tuning parameters for particular file */ +#define CHM_PARAM_MAX_BLOCKS_CACHED 0 +void chm_set_param(struct chmFile *h, + int paramType, + int paramVal); + +/* resolve a particular object from the archive */ +#define CHM_RESOLVE_SUCCESS (0) +#define CHM_RESOLVE_FAILURE (1) +int chm_resolve_object(struct chmFile *h, + const char *objPath, + struct chmUnitInfo *ui); + +/* retrieve part of an object from the archive */ +LONGINT64 chm_retrieve_object(struct chmFile *h, + const struct chmUnitInfo *ui, + unsigned char *buf, + LONGUINT64 addr, + LONGINT64 len); + +/* enumerate the objects in the .chm archive */ +typedef int (*CHM_ENUMERATOR)(struct chmFile *h, + struct chmUnitInfo *ui, + void *context); +#define CHM_ENUMERATE_NORMAL (1) +#define CHM_ENUMERATE_META (2) +#define CHM_ENUMERATE_SPECIAL (4) +#define CHM_ENUMERATE_FILES (8) +#define CHM_ENUMERATE_DIRS (16) +#define CHM_ENUMERATE_ALL (31) +#define CHM_ENUMERATOR_FAILURE (0) +#define CHM_ENUMERATOR_CONTINUE (1) +#define CHM_ENUMERATOR_SUCCESS (2) +int chm_enumerate(struct chmFile *h, + int what, + CHM_ENUMERATOR e, + void *context); + +int chm_enumerate_dir(struct chmFile *h, + const char *prefix, + int what, + CHM_ENUMERATOR e, + void *context); + +#ifdef __cplusplus +} +#endif + +#endif /* INCLUDED_CHMLIB_H */ diff --git a/lib/chmlib/lzx.c b/lib/chmlib/lzx.c new file mode 100644 index 0000000..7a5545b --- /dev/null +++ b/lib/chmlib/lzx.c @@ -0,0 +1,812 @@ +/* $Id: lzx.c 148 2007-03-07 09:22:36Z gyunaev $ */ +/*************************************************************************** + * lzx.c - LZX decompression routines * + * ------------------- * + * * + * maintainer: Jed Wing <[email protected]> * + * source: modified lzx.c from cabextract v0.5 * + * notes: This file was taken from cabextract v0.5, which was, * + * itself, a modified version of the lzx decompression code * + * from unlzx. * + * * + * platforms: In its current incarnation, this file has been tested on * + * two different Linux platforms (one, redhat-based, with a * + * 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with * + * 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were * + * Intel x86 compatible machines. * + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. Note that an exemption to this * + * license has been granted by Stuart Caie for the purposes of * + * distribution with chmlib. This does not, to the best of my * + * knowledge, constitute a change in the license of this (the LZX) code * + * in general. * + * * + ***************************************************************************/ + +#include "lzx.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#define memcpy __builtin_memcpy +#endif + +/* sized types */ +typedef unsigned char UBYTE; /* 8 bits exactly */ +typedef unsigned short UWORD; /* 16 bits (or more) */ +typedef unsigned int ULONG; /* 32 bits (or more) */ +typedef signed int LONG; /* 32 bits (or more) */ + +/* some constants defined by the LZX specification */ +#define LZX_MIN_MATCH (2) +#define LZX_MAX_MATCH (257) +#define LZX_NUM_CHARS (256) +#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */ +#define LZX_BLOCKTYPE_VERBATIM (1) +#define LZX_BLOCKTYPE_ALIGNED (2) +#define LZX_BLOCKTYPE_UNCOMPRESSED (3) +#define LZX_PRETREE_NUM_ELEMENTS (20) +#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */ +#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */ +#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */ + +/* LZX huffman defines: tweak tablebits as desired */ +#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) +#define LZX_PRETREE_TABLEBITS (6) +#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) +#define LZX_MAINTREE_TABLEBITS (12) +#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) +#define LZX_LENGTH_TABLEBITS (12) +#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS) +#define LZX_ALIGNED_TABLEBITS (7) + +#define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */ + +#define LZX_DECLARE_TABLE(tbl) \ + UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\ + UBYTE tbl##_len [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY] + +struct LZXstate +{ + UBYTE *window; /* the actual decoding window */ + ULONG window_size; /* window size (32Kb through 2Mb) */ + ULONG actual_size; /* window size when it was first allocated */ + ULONG window_posn; /* current offset within the window */ + ULONG R0, R1, R2; /* for the LRU offset system */ + UWORD main_elements; /* number of main tree elements */ + int header_read; /* have we started decoding at all yet? */ + UWORD block_type; /* type of this block */ + ULONG block_length; /* uncompressed length of this block */ + ULONG block_remaining; /* uncompressed bytes still left to decode */ + ULONG frames_read; /* the number of CFDATA blocks processed */ + LONG intel_filesize; /* magic header value used for transform */ + LONG intel_curpos; /* current offset in transform space */ + int intel_started; /* have we seen any translatable data yet? */ + + LZX_DECLARE_TABLE(PRETREE); + LZX_DECLARE_TABLE(MAINTREE); + LZX_DECLARE_TABLE(LENGTH); + LZX_DECLARE_TABLE(ALIGNED); +}; + +/* LZX decruncher */ + +/* Microsoft's LZX document and their implementation of the + * com.ms.util.cab Java package do not concur. + * + * In the LZX document, there is a table showing the correlation between + * window size and the number of position slots. It states that the 1MB + * window = 40 slots and the 2MB window = 42 slots. In the implementation, + * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the + * first slot whose position base is equal to or more than the required + * window size'. This would explain why other tables in the document refer + * to 50 slots rather than 42. + * + * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode + * is not defined in the specification. + * + * The LZX document does not state the uncompressed block has an + * uncompressed length field. Where does this length field come from, so + * we can know how large the block is? The implementation has it as the 24 + * bits following after the 3 blocktype bits, before the alignment + * padding. + * + * The LZX document states that aligned offset blocks have their aligned + * offset huffman tree AFTER the main and length trees. The implementation + * suggests that the aligned offset tree is BEFORE the main and length + * trees. + * + * The LZX document decoding algorithm states that, in an aligned offset + * block, if an extra_bits value is 1, 2 or 3, then that number of bits + * should be read and the result added to the match offset. This is + * correct for 1 and 2, but not 3, where just a huffman symbol (using the + * aligned tree) should be read. + * + * Regarding the E8 preprocessing, the LZX document states 'No translation + * may be performed on the last 6 bytes of the input block'. This is + * correct. However, the pseudocode provided checks for the *E8 leader* + * up to the last 6 bytes. If the leader appears between -10 and -7 bytes + * from the end, this would cause the next four bytes to be modified, at + * least one of which would be in the last 6 bytes, which is not allowed + * according to the spec. + * + * The specification states that the huffman trees must always contain at + * least one element. However, many CAB files contain blocks where the + * length tree is completely empty (because there are no matches), and + * this is expected to succeed. + */ + + +/* LZX uses what it calls 'position slots' to represent match offsets. + * What this means is that a small 'position slot' number and a small + * offset from that slot are encoded instead of one large offset for + * every match. + * - position_base is an index to the position slot bases + * - extra_bits states how many bits of offset-from-base data is needed. + */ +static const UBYTE extra_bits[51] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, + 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17 +}; + +static const ULONG position_base[51] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, + 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, + 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, + 1835008, 1966080, 2097152 +}; + +struct LZXstate *LZXinit(int window) +{ + struct LZXstate *pState=NULL; + ULONG wndsize = 1 << window; + int i, posn_slots; + + /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ + /* if a previously allocated window is big enough, keep it */ + if (window < 15 || window > 21) return NULL; + + /* allocate state and associated window */ + pState = (struct LZXstate *)malloc(sizeof(struct LZXstate)); + if (!(pState->window = (UBYTE *)malloc(wndsize))) + { + free(pState); + return NULL; + } + pState->actual_size = wndsize; + pState->window_size = wndsize; + + /* calculate required position slots */ + if (window == 20) posn_slots = 42; + else if (window == 21) posn_slots = 50; + else posn_slots = window << 1; + + /** alternatively **/ + /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */ + + /* initialize other state */ + pState->R0 = pState->R1 = pState->R2 = 1; + pState->main_elements = LZX_NUM_CHARS + (posn_slots << 3); + pState->header_read = 0; + pState->frames_read = 0; + pState->block_remaining = 0; + pState->block_type = LZX_BLOCKTYPE_INVALID; + pState->intel_curpos = 0; + pState->intel_started = 0; + pState->window_posn = 0; + + /* initialise tables to 0 (because deltas will be applied to them) */ + for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0; + for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) pState->LENGTH_len[i] = 0; + + return pState; +} + +void LZXteardown(struct LZXstate *pState) +{ + if (pState) + { + if (pState->window) + free(pState->window); + free(pState); + } +} + +int LZXreset(struct LZXstate *pState) +{ + int i; + + pState->R0 = pState->R1 = pState->R2 = 1; + pState->header_read = 0; + pState->frames_read = 0; + pState->block_remaining = 0; + pState->block_type = LZX_BLOCKTYPE_INVALID; + pState->intel_curpos = 0; + pState->intel_started = 0; + pState->window_posn = 0; + + for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0; + for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->LENGTH_len[i] = 0; + + return DECR_OK; +} + + +/* Bitstream reading macros: + * + * INIT_BITSTREAM should be used first to set up the system + * READ_BITS(var,n) takes N bits from the buffer and puts them in var + * + * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer + * PEEK_BITS(n) extracts (without removing) N bits from the bit buffer + * REMOVE_BITS(n) removes N bits from the bit buffer + * + * These bit access routines work by using the area beyond the MSB and the + * LSB as a free source of zeroes. This avoids having to mask any bits. + * So we have to know the bit width of the bitbuffer variable. This is + * sizeof(ULONG) * 8, also defined as ULONG_BITS + */ + +/* number of bits in ULONG. Note: This must be at multiple of 16, and at + * least 32 for the bitbuffer code to work (ie, it must be able to ensure + * up to 17 bits - that's adding 16 bits when there's one bit left, or + * adding 32 bits when there are no bits left. The code should work fine + * for machines where ULONG >= 32 bits. + */ +#define ULONG_BITS (sizeof(ULONG)<<3) + +#define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0) + +#define ENSURE_BITS(n) \ + while (bitsleft < (n)) { \ + bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft); \ + bitsleft += 16; inpos+=2; \ + } + +#define PEEK_BITS(n) (bitbuf >> (ULONG_BITS - (n))) +#define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n))) + +#define READ_BITS(v,n) do { \ + ENSURE_BITS(n); \ + (v) = PEEK_BITS(n); \ + REMOVE_BITS(n); \ +} while (0) + + +/* Huffman macros */ + +#define TABLEBITS(tbl) (LZX_##tbl##_TABLEBITS) +#define MAXSYMBOLS(tbl) (LZX_##tbl##_MAXSYMBOLS) +#define SYMTABLE(tbl) (pState->tbl##_table) +#define LENTABLE(tbl) (pState->tbl##_len) + +/* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths. + * In reality, it just calls make_decode_table() with the appropriate + * values - they're all fixed by some #defines anyway, so there's no point + * writing each call out in full by hand. + */ +#define BUILD_TABLE(tbl) \ + if (make_decode_table( \ + MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl) \ + )) { return DECR_ILLEGALDATA; } + + +/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the + * bitstream using the stated table and puts it in var. + */ +#define READ_HUFFSYM(tbl,var) do { \ + ENSURE_BITS(16); \ + hufftbl = SYMTABLE(tbl); \ + if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) { \ + j = 1 << (ULONG_BITS - TABLEBITS(tbl)); \ + do { \ + j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0; \ + if (!j) { return DECR_ILLEGALDATA; } \ + } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl)); \ + } \ + j = LENTABLE(tbl)[(var) = i]; \ + REMOVE_BITS(j); \ +} while (0) + + +/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols + * first to last in the given table. The code lengths are stored in their + * own special LZX way. + */ +#define READ_LENGTHS(tbl,first,last) do { \ + lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \ + if (lzx_read_lens(pState, LENTABLE(tbl),(first),(last),&lb)) { \ + return DECR_ILLEGALDATA; \ + } \ + bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \ +} while (0) + + +/* make_decode_table(nsyms, nbits, length[], table[]) + * + * This function was coded by David Tritscher. It builds a fast huffman + * decoding table out of just a canonical huffman code lengths table. + * + * nsyms = total number of symbols in this huffman tree. + * nbits = any symbols with a code length of nbits or less can be decoded + * in one lookup of the table. + * length = A table to get code lengths from [0 to syms-1] + * table = The table to fill up with decoded symbols and pointers. + * + * Returns 0 for OK or 1 for error + */ + +static int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) { + register UWORD sym; + register ULONG leaf; + register UBYTE bit_num = 1; + ULONG fill; + ULONG pos = 0; /* the current position in the decode table */ + ULONG table_mask = 1 << nbits; + ULONG bit_mask = table_mask >> 1; /* don't do 0 length codes */ + ULONG next_symbol = bit_mask; /* base of allocation for long codes */ + + /* fill entries for codes short enough for a direct mapping */ + while (bit_num <= nbits) { + for (sym = 0; sym < nsyms; sym++) { + if (length[sym] == bit_num) { + leaf = pos; + + if((pos += bit_mask) > table_mask) return 1; /* table overrun */ + + /* fill all possible lookups of this symbol with the symbol itself */ + fill = bit_mask; + while (fill-- > 0) table[leaf++] = sym; + } + } + bit_mask >>= 1; + bit_num++; + } + + /* if there are any codes longer than nbits */ + if (pos != table_mask) { + /* clear the remainder of the table */ + for (sym = pos; sym < table_mask; sym++) table[sym] = 0; + + /* give ourselves room for codes to grow by up to 16 more bits */ + pos <<= 16; + table_mask <<= 16; + bit_mask = 1 << 15; + + while (bit_num <= 16) { + for (sym = 0; sym < nsyms; sym++) { + if (length[sym] == bit_num) { + leaf = pos >> 16; + for (fill = 0; fill < bit_num - nbits; fill++) { + /* if this path hasn't been taken yet, 'allocate' two entries */ + if (table[leaf] == 0) { + table[(next_symbol << 1)] = 0; + table[(next_symbol << 1) + 1] = 0; + table[leaf] = next_symbol++; + } + /* follow the path and select either left or right for next bit */ + leaf = table[leaf] << 1; + if ((pos >> (15-fill)) & 1) leaf++; + } + table[leaf] = sym; + + if ((pos += bit_mask) > table_mask) return 1; /* table overflow */ + } + } + bit_mask >>= 1; + bit_num++; + } + } + + /* full table? */ + if (pos == table_mask) return 0; + + /* either erroneous table, or all elements are 0 - let's find out. */ + for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1; + return 0; +} + +struct lzx_bits { + ULONG bb; + int bl; + UBYTE *ip; +}; + +static int lzx_read_lens(struct LZXstate *pState, UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) { + ULONG i,j, x,y; + int z; + + register ULONG bitbuf = lb->bb; + register int bitsleft = lb->bl; + UBYTE *inpos = lb->ip; + UWORD *hufftbl; + + for (x = 0; x < 20; x++) { + READ_BITS(y, 4); + LENTABLE(PRETREE)[x] = y; + } + BUILD_TABLE(PRETREE); + + for (x = first; x < last; ) { + READ_HUFFSYM(PRETREE, z); + if (z == 17) { + READ_BITS(y, 4); y += 4; + while (y--) lens[x++] = 0; + } + else if (z == 18) { + READ_BITS(y, 5); y += 20; + while (y--) lens[x++] = 0; + } + else if (z == 19) { + READ_BITS(y, 1); y += 4; + READ_HUFFSYM(PRETREE, z); + z = lens[x] - z; if (z < 0) z += 17; + while (y--) lens[x++] = z; + } + else { + z = lens[x] - z; if (z < 0) z += 17; + lens[x++] = z; + } + } + + lb->bb = bitbuf; + lb->bl = bitsleft; + lb->ip = inpos; + return 0; +} + +int LZXdecompress(struct LZXstate *pState, unsigned char *inpos, unsigned char *outpos, int inlen, int outlen) { + UBYTE *endinp = inpos + inlen; + UBYTE *window = pState->window; + UBYTE *runsrc, *rundest; + UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */ + + ULONG window_posn = pState->window_posn; + ULONG window_size = pState->window_size; + ULONG R0 = pState->R0; + ULONG R1 = pState->R1; + ULONG R2 = pState->R2; + + register ULONG bitbuf; + register int bitsleft; + ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */ + struct lzx_bits lb; /* used in READ_LENGTHS macro */ + + int togo = outlen, this_run, main_element, aligned_bits; + int match_length, length_footer, extra, verbatim_bits; + + INIT_BITSTREAM; + + /* read header if necessary */ + if (!pState->header_read) { + i = j = 0; + READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); } + pState->intel_filesize = (i << 16) | j; /* or 0 if not encoded */ + pState->header_read = 1; + } + + /* main decoding loop */ + while (togo > 0) { + /* last block finished, new block expected */ + if (pState->block_remaining == 0) { + if (pState->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) { + if (pState->block_length & 1) inpos++; /* realign bitstream to word */ + INIT_BITSTREAM; + } + + READ_BITS(pState->block_type, 3); + READ_BITS(i, 16); + READ_BITS(j, 8); + pState->block_remaining = pState->block_length = (i << 8) | j; + + switch (pState->block_type) { + case LZX_BLOCKTYPE_ALIGNED: + for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; } + BUILD_TABLE(ALIGNED); + /* rest of aligned header is same as verbatim */ + + case LZX_BLOCKTYPE_VERBATIM: + READ_LENGTHS(MAINTREE, 0, 256); + READ_LENGTHS(MAINTREE, 256, pState->main_elements); + BUILD_TABLE(MAINTREE); + if (LENTABLE(MAINTREE)[0xE8] != 0) pState->intel_started = 1; + + READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); + BUILD_TABLE(LENGTH); + break; + + case LZX_BLOCKTYPE_UNCOMPRESSED: + pState->intel_started = 1; /* because we can't assume otherwise */ + ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */ + if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */ + R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; + R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; + R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; + break; + + default: + return DECR_ILLEGALDATA; + } + } + + /* buffer exhaustion check */ + if (inpos > endinp) { + /* it's possible to have a file where the next run is less than + * 16 bits in size. In this case, the READ_HUFFSYM() macro used + * in building the tables will exhaust the buffer, so we should + * allow for this, but not allow those accidentally read bits to + * be used (so we check that there are at least 16 bits + * remaining - in this boundary case they aren't really part of + * the compressed data) + */ + if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA; + } + + while ((this_run = pState->block_remaining) > 0 && togo > 0) { + if (this_run > togo) this_run = togo; + togo -= this_run; + pState->block_remaining -= this_run; + + /* apply 2^x-1 mask */ + window_posn &= window_size - 1; + /* runs can't straddle the window wraparound */ + if ((window_posn + this_run) > window_size) + return DECR_DATAFORMAT; + + switch (pState->block_type) { + + case LZX_BLOCKTYPE_VERBATIM: + while (this_run > 0) { + READ_HUFFSYM(MAINTREE, main_element); + + if (main_element < LZX_NUM_CHARS) { + /* literal: 0 to LZX_NUM_CHARS-1 */ + window[window_posn++] = main_element; + this_run--; + } + else { + /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ + main_element -= LZX_NUM_CHARS; + + match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; + if (match_length == LZX_NUM_PRIMARY_LENGTHS) { + READ_HUFFSYM(LENGTH, length_footer); + match_length += length_footer; + } + match_length += LZX_MIN_MATCH; + + match_offset = main_element >> 3; + + if (match_offset > 2) { + /* not repeated offset */ + if (match_offset != 3) { + extra = extra_bits[match_offset]; + READ_BITS(verbatim_bits, extra); + match_offset = position_base[match_offset] - 2 + verbatim_bits; + } + else { + match_offset = 1; + } + + /* update repeated offset LRU queue */ + R2 = R1; R1 = R0; R0 = match_offset; + } + else if (match_offset == 0) { + match_offset = R0; + } + else if (match_offset == 1) { + match_offset = R1; + R1 = R0; R0 = match_offset; + } + else /* match_offset == 2 */ { + match_offset = R2; + R2 = R0; R0 = match_offset; + } + + rundest = window + window_posn; + runsrc = rundest - match_offset; + window_posn += match_length; + if (window_posn > window_size) return DECR_ILLEGALDATA; + this_run -= match_length; + + /* copy any wrapped around source data */ + while ((runsrc < window) && (match_length-- > 0)) { + *rundest++ = *(runsrc + window_size); runsrc++; + } + /* copy match data - no worries about destination wraps */ + while (match_length-- > 0) *rundest++ = *runsrc++; + + } + } + break; + + case LZX_BLOCKTYPE_ALIGNED: + while (this_run > 0) { + READ_HUFFSYM(MAINTREE, main_element); + + if (main_element < LZX_NUM_CHARS) { + /* literal: 0 to LZX_NUM_CHARS-1 */ + window[window_posn++] = main_element; + this_run--; + } + else { + /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ + main_element -= LZX_NUM_CHARS; + + match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; + if (match_length == LZX_NUM_PRIMARY_LENGTHS) { + READ_HUFFSYM(LENGTH, length_footer); + match_length += length_footer; + } + match_length += LZX_MIN_MATCH; + + match_offset = main_element >> 3; + + if (match_offset > 2) { + /* not repeated offset */ + extra = extra_bits[match_offset]; + match_offset = position_base[match_offset] - 2; + if (extra > 3) { + /* verbatim and aligned bits */ + extra -= 3; + READ_BITS(verbatim_bits, extra); + match_offset += (verbatim_bits << 3); + READ_HUFFSYM(ALIGNED, aligned_bits); + match_offset += aligned_bits; + } + else if (extra == 3) { + /* aligned bits only */ + READ_HUFFSYM(ALIGNED, aligned_bits); + match_offset += aligned_bits; + } + else if (extra > 0) { /* extra==1, extra==2 */ + /* verbatim bits only */ + READ_BITS(verbatim_bits, extra); + match_offset += verbatim_bits; + } + else /* extra == 0 */ { + /* ??? */ + match_offset = 1; + } + + /* update repeated offset LRU queue */ + R2 = R1; R1 = R0; R0 = match_offset; + } + else if (match_offset == 0) { + match_offset = R0; + } + else if (match_offset == 1) { + match_offset = R1; + R1 = R0; R0 = match_offset; + } + else /* match_offset == 2 */ { + match_offset = R2; + R2 = R0; R0 = match_offset; + } + + rundest = window + window_posn; + runsrc = rundest - match_offset; + window_posn += match_length; + if (window_posn > window_size) return DECR_ILLEGALDATA; + this_run -= match_length; + + /* copy any wrapped around source data */ + while ((runsrc < window) && (match_length-- > 0)) { + *rundest++ = *(runsrc + window_size); runsrc++; + } + /* copy match data - no worries about destination wraps */ + while (match_length-- > 0) *rundest++ = *runsrc++; + + } + } + break; + + case LZX_BLOCKTYPE_UNCOMPRESSED: + if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA; + memcpy(window + window_posn, inpos, (size_t) this_run); + inpos += this_run; window_posn += this_run; + break; + + default: + return DECR_ILLEGALDATA; /* might as well */ + } + + } + } + + if (togo != 0) return DECR_ILLEGALDATA; + memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen); + + pState->window_posn = window_posn; + pState->R0 = R0; + pState->R1 = R1; + pState->R2 = R2; + + /* intel E8 decoding */ + if ((pState->frames_read++ < 32768) && pState->intel_filesize != 0) { + if (outlen <= 6 || !pState->intel_started) { + pState->intel_curpos += outlen; + } + else { + UBYTE *data = outpos; + UBYTE *dataend = data + outlen - 10; + LONG curpos = pState->intel_curpos; + LONG filesize = pState->intel_filesize; + LONG abs_off, rel_off; + + pState->intel_curpos = curpos + outlen; + + while (data < dataend) { + if (*data++ != 0xE8) { curpos++; continue; } + abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); + if ((abs_off >= -curpos) && (abs_off < filesize)) { + rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; + data[0] = (UBYTE) rel_off; + data[1] = (UBYTE) (rel_off >> 8); + data[2] = (UBYTE) (rel_off >> 16); + data[3] = (UBYTE) (rel_off >> 24); + } + data += 4; + curpos += 5; + } + } + } + return DECR_OK; +} + +#ifdef LZX_CHM_TESTDRIVER +int main(int c, char **v) +{ + FILE *fin, *fout; + struct LZXstate state; + UBYTE ibuf[16384]; + UBYTE obuf[32768]; + int ilen, olen; + int status; + int i; + int count=0; + int w = atoi(v[1]); + LZXinit(&state, w); + fout = fopen(v[2], "wb"); + for (i=3; i<c; i++) + { + fin = fopen(v[i], "rb"); + ilen = fread(ibuf, 1, 16384, fin); + status = LZXdecompress(&state, ibuf, obuf, ilen, 32768); + switch (status) + { + case DECR_OK: + printf("ok\n"); + fwrite(obuf, 1, 32768, fout); + break; + case DECR_DATAFORMAT: + printf("bad format\n"); + break; + case DECR_ILLEGALDATA: + printf("illegal data\n"); + break; + case DECR_NOMEMORY: + printf("no memory\n"); + break; + default: + break; + } + fclose(fin); + if (++count == 2) + { + count = 0; + LZXreset(&state); + } + } + fclose(fout); +} +#endif diff --git a/lib/chmlib/lzx.h b/lib/chmlib/lzx.h new file mode 100644 index 0000000..26d71a8 --- /dev/null +++ b/lib/chmlib/lzx.h @@ -0,0 +1,62 @@ +/* $Id: lzx.h 148 2007-03-07 09:22:36Z gyunaev $ */ +/*************************************************************************** + * lzx.h - LZX decompression routines * + * ------------------- * + * * + * maintainer: Jed Wing <[email protected]> * + * source: modified lzx.c from cabextract v0.5 * + * notes: This file was taken from cabextract v0.5, which was, * + * itself, a modified version of the lzx decompression code * + * from unlzx. * + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. Note that an exemption to this * + * license has been granted by Stuart Caie for the purposes of * + * distribution with chmlib. This does not, to the best of my * + * knowledge, constitute a change in the license of this (the LZX) code * + * in general. * + * * + ***************************************************************************/ + +#ifndef INCLUDED_LZX_H +#define INCLUDED_LZX_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* return codes */ +#define DECR_OK (0) +#define DECR_DATAFORMAT (1) +#define DECR_ILLEGALDATA (2) +#define DECR_NOMEMORY (3) + +/* opaque state structure */ +struct LZXstate; + +/* create an lzx state object */ +struct LZXstate *LZXinit(int window); + +/* destroy an lzx state object */ +void LZXteardown(struct LZXstate *pState); + +/* reset an lzx stream */ +int LZXreset(struct LZXstate *pState); + +/* decompress an LZX compressed block */ +int LZXdecompress(struct LZXstate *pState, + unsigned char *inpos, + unsigned char *outpos, + int inlen, + int outlen); + +#ifdef __cplusplus +} +#endif + +#endif /* INCLUDED_LZX_H */ |