1 files changed, 451 insertions, 0 deletions
diff --git a/kviewshell/plugins/djvu/libdjvu/DjVmDir.h b/kviewshell/plugins/djvu/libdjvu/DjVmDir.h
new file mode 100644
index 00000000..86b661e3
--- /dev/null
+++ b/kviewshell/plugins/djvu/libdjvu/DjVmDir.h
@@ -0,0 +1,451 @@
+//C-  -*- C++ -*-
+//C- -------------------------------------------------------------------
+//C- DjVuLibre-3.5
+//C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
+//C- Copyright (c) 2001  AT&T
+//C-
+//C- This software is subject to, and may be distributed under, the
+//C- GNU General Public License, Version 2. The license should have
+//C- accompanied the software or you may obtain a copy of the license
+//C- from the Free Software Foundation at http://www.fsf.org .
+//C-
+//C- This program is distributed in the hope that it will be useful,
+//C- but WITHOUT ANY WARRANTY; without even the implied warranty of
+//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//C- GNU General Public License for more details.
+//C- 
+//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
+//C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech 
+//C- Software authorized us to replace the original DjVu(r) Reference 
+//C- Library notice by the following text (see doc/lizard2002.djvu):
+//C-
+//C-  ------------------------------------------------------------------
+//C- | DjVu (r) Reference Library (v. 3.5)
+//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
+//C- | The DjVu Reference Library is protected by U.S. Pat. No.
+//C- | 6,058,214 and patents pending.
+//C- |
+//C- | This software is subject to, and may be distributed under, the
+//C- | GNU General Public License, Version 2. The license should have
+//C- | accompanied the software or you may obtain a copy of the license
+//C- | from the Free Software Foundation at http://www.fsf.org .
+//C- |
+//C- | The computer code originally released by LizardTech under this
+//C- | license and unmodified by other parties is deemed "the LIZARDTECH
+//C- | ORIGINAL CODE."  Subject to any third party intellectual property
+//C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
+//C- | non-exclusive license to make, use, sell, or otherwise dispose of 
+//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
+//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
+//C- | General Public License.   This grant only confers the right to 
+//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
+//C- | the extent such infringement is reasonably necessary to enable 
+//C- | recipient to make, have made, practice, sell, or otherwise dispose 
+//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
+//C- | any greater extent that may be necessary to utilize further 
+//C- | modifications or combinations.
+//C- |
+//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
+//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
+//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+//C- +------------------------------------------------------------------
+// 
+// $Id: DjVmDir.h,v 1.10 2003/11/07 22:08:20 leonb Exp $
+// $Name: release_3_5_15 $
+
+#ifndef _DJVMDIR_H
+#define _DJVMDIR_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#if NEED_GNUG_PRAGMAS
+# pragma interface
+#endif
+
+
+/** @name DjVmDir.h
+    Files #"DjVmDir.h"# and #"DjVmDir.cpp"# implement class \Ref{DjVmDir} for
+    representing the directory of a DjVu multipage document.
+
+    {\bf Bundled vs. Indirect format} --- There are currently two multipage
+    DjVu formats supported: {\em bundled} and {\em indirect}.  In the first
+    format all component files composing a given document are packaged (or
+    bundled) into one file, in the second one every page and component is
+    stored in a separate file and there is one more file, which contains the
+    list of all others.
+
+    {\bf Multipage DjVu format} --- Multipage DjVu documents follow the EA
+    IFF85 format (cf. \Ref{IFFByteStream.h}.)  A document is composed of a
+    #"FORM:DJVM"# whose first chunk is a #"DIRM"# chunk containing the {\em
+    document directory}.  This directory lists all component files composing
+    the given document, helps to access every component file and identify the
+    pages of the document.
+    \begin{itemize} 
+    \item In a {\em bundled} multipage file, the component files 
+         are stored immediately after the #"DIRM"# chunk,
+         within the #"FORM:DJVU"# composite chunk.  
+    \item In an {\em indirect} multipage file, the component files are 
+          stored in different files whose URLs are composed using information 
+          stored in the #"DIRM"# chunk.
+    \end{itemize} 
+    Most of the component files represent pages of a document.  Some files
+    however represent data shared by several pages.  The pages refer to these
+    supporting files by means of an inclusion chunk (#"INCL"# chunks)
+    identifying the supporting file.
+
+    {\bf Document Directory} --- Every directory record describes a component
+    file.  Each component file is identified by a small string named the
+    identifier (ID).  Each component file also contains a file name and a
+    title.  The format of the #"DIRM"# chunk is described in section
+    \Ref{Format of the DIRM chunk.}.
+
+    Theoretically, IDs are used to uniquely identify each component file in
+    #"INCL"# chunks, names are used to compose the the URLs of the component
+    files in an indirect multipage DjVu file, and titles are cosmetic names
+    possibly displayed when viewing a page of a document.  There are however
+    many problems with this scheme, and we {\em strongly suggest}, with the
+    current implementation to always make the file ID, the file name and the
+    file title identical.
+
+    @memo Implements DjVu multipage document directory
+    @author Andrei Erofeev <[email protected]>
+    @version
+    #$Id: DjVmDir.h,v 1.10 2003/11/07 22:08:20 leonb Exp $# */
+//@{
+
+
+
+#include "GString.h"
+#include "GThreads.h"
+
+#ifdef HAVE_NAMESPACES
+namespace DJVU {
+# ifdef NOT_DEFINED // Just to fool emacs c++ mode
+}
+#endif
+#endif
+
+class ByteStream;
+
+/** Implements DjVu multipage document directory.  There are currently two
+    multipage DjVu formats supported: {\em bundled} and {\em indirect}.  In
+    the first format all component files composing a given document are
+    packaged (or bundled) into one file, in the second one every page and
+    component is stored in a separate file and there is one more file, which
+    contains the list of all others.
+
+    The multipage document directory lists all component files composing the
+    given document, helps to access every file, identify pages and maintain
+    user-specified shortcuts.  Every directory record describes a file
+    composing the document.  Each file is identified by a small string named
+    the identifier (ID).  Each file may also contain a file name and a title.
+
+    The #DjVmDir# class represents a multipage document directory.  Its main
+    purpose is to encode and decode the document directory when writing or
+    reading the #DIRM# chunk.  Normally you don't have to create this class
+    yourself. It's done automatically when \Ref{DjVmDoc} class initializes
+    itself. It may be useful though to be able to access records in the
+    directory because some classes (like \Ref{DjVuDocument} and \Ref{DjVmDoc})
+    return a pointer to #DjVmDir# in some cases. */
+
+class DjVmDir : public GPEnabled
+{
+protected:
+      /** Class \Ref{DjVmDir::File} represents the directory records
+          managed by class \Ref{DjVmDir}. */
+   DjVmDir(void) { } ;
+public:
+   class File;
+
+   static const int version;
+
+      /** Class \Ref{DjVmDir::File} represents the directory records
+          managed by class \Ref{DjVmDir}. */
+   static GP<DjVmDir> create(void) {return new DjVmDir; } ;
+
+      /** Decodes the directory from the specified stream. */
+   void decode(const GP<ByteStream> &stream);
+      /** Encodes the directory into the specified stream. */
+   void encode(const GP<ByteStream> &stream, const bool do_rename=false) const;
+      /** Encodes the directory into the specified stream, explicitely as bundled or indirect. */
+   void encode(const GP<ByteStream> &stream, const bool bundled, const bool do_rename) const;
+      /** Tests if directory defines an {\em indirect} document. */
+   bool is_indirect(void) const;
+      /** Tests if the directory defines a {\em bundled} document. */
+   bool is_bundled(void) const;
+      /** Translates page numbers to file records. */
+   GP<File> page_to_file(int page_num) const;
+      /** Translates file names to file records. */
+   GP<File> name_to_file(const GUTF8String & name) const;
+      /** Translates file IDs to file records. */
+   GP<File> id_to_file(const GUTF8String &id) const;
+      /** Translates file shortcuts to file records. */
+   GP<File> title_to_file(const GUTF8String &title) const;
+      /** Returns position of the file in the directory. */
+   int get_file_pos(const File * f) const;
+      /** Returns position of the given page in the directory. */
+   int get_page_pos(int page_num) const;
+      /** Check for duplicate names, and resolve them. */
+   GPList<File> resolve_duplicates(const bool save_as_bundled);
+      /** Returns a copy of the list of file records. */
+   GPList<File> get_files_list(void) const;
+      /** Returns the number of file records. */
+   int get_files_num(void) const;
+      /** Returns the number of file records representing pages. */
+   int get_pages_num(void) const;
+      /** Returns back pointer to the file with #SHARED_ANNO# flag.
+        Note that there may be only one file with shared annotations
+        in any multipage DjVu document. */
+   GP<File> get_shared_anno_file(void) const;
+      /** Changes the title of the file with ID #id#. */
+   void set_file_title(const GUTF8String &id, const GUTF8String &title);
+      /** Changes the name of the file with ID #id#. */
+   void set_file_name(const GUTF8String &id, const GUTF8String &name);
+      /** Inserts the specified file record at the specified position.
+        Specifying #pos# equal to #-1# means to append.  The actual position
+        inserted is returned. */
+   int insert_file(const GP<File> & file, int pos=-1);
+      /** Removes a file record with ID #id#. */
+   void delete_file(const GUTF8String &id);
+private:
+   GCriticalSection class_lock;
+   GPList<File>	files_list;
+   GPArray<File> page2file;
+   GPMap<GUTF8String, File> name2file;
+   GPMap<GUTF8String, File> id2file;
+   GPMap<GUTF8String, File> title2file;
+private: //dummy stuff
+   static void decode(ByteStream *);
+   static void encode(ByteStream *);
+};
+
+class DjVmDir::File : public GPEnabled
+{
+public:
+  // Out of the record: INCLUDE below must be zero and PAGE must be one.
+  // This is to avoid problems with the File constructor, which now takes
+  // 'int file_type' as the last argument instead of 'bool is_page'
+  
+  /** File type. Possible file types are:
+     \begin{description}
+       \item[PAGE] This is a top level page file. It may include other
+         #INCLUDE#d files, which may in turn be shared between
+         different pages.
+       \item[INCLUDE] This file is included into some other file inside
+         this document.
+       \item[THUMBNAILS] This file contains thumbnails for the document
+         pages.
+       \item[SHARED_ANNO] This file contains annotations shared by
+         all the pages. It's supposed to be included into every page
+         for the annotations to take effect. There may be only one
+         file with shared annotations in a document.
+     \end{description} */
+  enum FILE_TYPE { INCLUDE=0, PAGE=1, THUMBNAILS=2, SHARED_ANNO=3 };
+protected:
+  /** Default constructor. */
+  File(void);
+
+public:
+  static GP<File> create(void) { return new File(); }
+  static GP<File> create(const GUTF8String &load_name,
+     const GUTF8String &save_name, const GUTF8String &title,
+     const FILE_TYPE file_type);
+
+  /** Check for filenames that are not valid for the native encoding,
+      and change them. */
+  const GUTF8String &check_save_name(const bool as_bundled);
+
+  /** File name.  The optional file name must be unique and is the name
+      that will be used when the document is saved to an indirect file.
+      If not assigned, the value of #id# will be used for this purpose.
+      By keeping the name in {\em bundled} document we guarantee, that it
+      can be expanded later into {\em indirect} document and files will
+      still have the same names, if the name is legal on a given filesystem.
+    */
+  const GUTF8String &get_save_name(void) const;
+
+  /** File identifier.  The encoder assigns a unique identifier to each file
+      in a multipage document. This is the name used when loading files.
+      Indirection chunks in other files (#"INCL"# chunks) may refer to another
+      file using its identifier. */
+  const GUTF8String &get_load_name(void) const;
+  void set_load_name(const GUTF8String &id);
+
+  /** File title.  The file title is assigned by the user and may be used as
+      a shortcut for viewing a particular page.  Names like #"chapter1"# or
+      #"appendix"# are appropriate. */
+  const GUTF8String &get_title() const;
+  void set_title(const GUTF8String &id);
+
+  /** Reports an ascii string indicating file type. */
+  GUTF8String get_str_type(void) const;
+
+  /** Offset of the file data in a bundled DJVM file.  This number is
+      relevant in the {\em bundled} case only when everything is packed into
+      one single file. */
+  int offset;
+
+  /** Size of the file data in a bundled DJVM file.  This number is
+      relevant in the {\em bundled} case only when everything is
+      packed into one single file. */
+  int size;
+
+  /** Have we checked the saved file name, to see if it is valid on the
+      local disk? */
+  bool valid_name;
+
+  /** Tests if this file represents a page of the document. */
+  bool is_page(void) const 
+  {
+    return (flags & TYPE_MASK)==PAGE;
+  }
+
+  /** Returns #TRUE# if this file is included into some other files of
+      this document. */
+  bool is_include(void) const
+  {
+    return (flags & TYPE_MASK)==INCLUDE;
+  }
+
+  /** Returns #TRUE# if this file contains thumbnails for the document pages. */
+  bool is_thumbnails(void) const
+  {
+    return (flags & TYPE_MASK)==THUMBNAILS;
+  }
+
+  /** Returns the page number of this file. This function returns
+      #-1# if this file does not represent a page of the document. */
+  bool is_shared_anno(void) const
+  { return (flags & TYPE_MASK)==SHARED_ANNO; }
+
+  int get_page_num(void) const 
+  { return page_num; } 
+protected:
+  GUTF8String name;
+  GUTF8String oldname;
+  GUTF8String id;
+  GUTF8String title; 
+  void set_save_name(const GUTF8String &name);
+private:
+      friend class DjVmDir;
+      enum FLAGS_0 { IS_PAGE_0=1, HAS_NAME_0=2, HAS_TITLE_0=4 };
+      enum FLAGS_1 { HAS_NAME=0x80, HAS_TITLE=0x40, TYPE_MASK=0x3f };
+      unsigned char flags;
+      int page_num;
+};
+
+inline const GUTF8String &
+DjVmDir::File::get_load_name(void) const
+{ return id; }
+
+inline const GUTF8String &
+DjVmDir::File::get_title() const
+{ return *(title.length()?&title:&id); }
+
+inline void
+DjVmDir::File::set_title(const GUTF8String &xtitle) { title=xtitle; }
+
+/** @name Format of the DIRM chunk.
+
+    {\bf Variants} --- There are two versions of the #"DIRM"# chunk format.
+    The version number is identified by the seven low bits of the first byte
+    of the chunk.  Version {\bf 0} is obsolete and should never be used.  This
+    section describes version {\bf 1}.  There are two major multipage DjVu
+    formats supported: {\em bundled} and {\em indirect}.  The #"DIRM"# chunk
+    indicates which format is used in the most significant bit of the first
+    byte of the chunk.  The document is bundled when this bit is set.
+    Otherwise the document is indirect.
+
+    {\bf Unencoded data} --- The #"DIRM"# chunk is composed some unencoded
+    data followed by \Ref{bzz} encoded data.  The unencoded data starts with
+    the version byte and a 16 bit integer representing the number of component
+    files.  All integers are encoded with the most significant byte first.
+    \begin{verbatim}
+          BYTE:             Flags/Version:  0x<bundled>0000011
+          INT16:            Number of component files.
+    \end{verbatim}
+    When the document is a bundled document (i.e. the flag #bundled# is set),
+    this header is followed by the offsets of each of the component files within
+    the #"FORM:DJVM"#.  These offsets allow for random component file access.
+    \begin{verbatim}
+          INT32:            Offset of first component file.
+          INT32:            Offset of second component file.
+          ...
+          INT32:            Offset of last component file.
+    \end{verbatim}
+
+    {\bf BZZ encoded data} --- The rest of the chunk is entirely compressed
+    with the BZZ general purpose compressor.  We describe now the data fed
+    into (or retrieved from) the BZZ codec (cf. \Ref{BSByteStream}.)  First
+    come the sizes and the flags associated with each component file.
+    \begin{verbatim}
+          INT24:             Size of the first component file.
+          INT24:             Size of the second component file.
+          ...
+          INT24:             Size of the last component file.
+          BYTE:              Flag byte for the first component file.
+          BYTE:              Flag byte for the second component file.
+          ...
+          BYTE:              Flag byte for the last component file.
+    \end{verbatim}
+    The flag bytes have the following format:
+    \begin{verbatim}
+          0b<hasname><hastitle>000000     for a file included by other files.
+          0b<hasname><hastitle>000001     for a file representing a page.
+          0b<hasname><hastitle>000010     for a file containing thumbnails.
+    \end{verbatim}
+    Flag #hasname# is set when the name of the file is different from the file
+    ID.  Flag #hastitle# is set when the title of the file is different from
+    the file ID.  These flags are used to avoid encoding the same string three
+    times.  Then come a sequence of zero terminated strings.  There are one to
+    three such strings per component file.  The first string contains the ID
+    of the component file.  The second string contains the name of the
+    component file.  It is only present when the flag #hasname# is set. The third
+    one contains the title of the component file. It is only present when the
+    flag #hastitle# is set. The \Ref{bzz} encoding system makes sure that 
+    all these strings will be encoded efficiently despite their possible
+    redundancies.
+    \begin{verbatim}
+          ZSTR:     ID of the first component file.
+          ZSTR:     Name of the first component file (only if #hasname# is set.)
+          ZSTR:     Title of the first component file (only if #hastitle# is set.)
+          ... 
+          ZSTR:     ID of the last component file.
+          ZSTR:     Name of the last component file (only if #hasname# is set.)
+          ZSTR:     Title of the last component file (only if #hastitle# is set.)
+    \end{verbatim}
+
+    @memo Description of the format of the DIRM chunk.  */
+//@}
+
+
+
+// -------------- IMPLEMENTATION
+
+
+inline bool
+DjVmDir::is_bundled(void) const
+{
+  return ! is_indirect();
+}
+
+inline bool
+DjVmDir::is_indirect(void) const
+{
+  GCriticalSectionLock lock((GCriticalSection *) &class_lock);
+  return ( files_list.size() && files_list[files_list] != 0 &&
+           files_list[files_list]->offset==0 );
+}
+
+
+
+// ----- THE END
+
+#ifdef HAVE_NAMESPACES
+}
+# ifndef NOT_USING_DJVU_NAMESPACE
+using namespace DJVU;
+# endif
+#endif
+#endif