diff options
Diffstat (limited to 'kviewshell/plugins/djvu/libdjvu/DataPool.h')
-rw-r--r-- | kviewshell/plugins/djvu/libdjvu/DataPool.h | 627 |
1 files changed, 627 insertions, 0 deletions
diff --git a/kviewshell/plugins/djvu/libdjvu/DataPool.h b/kviewshell/plugins/djvu/libdjvu/DataPool.h new file mode 100644 index 00000000..fb4bea4e --- /dev/null +++ b/kviewshell/plugins/djvu/libdjvu/DataPool.h @@ -0,0 +1,627 @@ +//C- -*- C++ -*- +//C- ------------------------------------------------------------------- +//C- DjVuLibre-3.5 +//C- Copyright (c) 2002 Leon Bottou and Yann Le Cun. +//C- Copyright (c) 2001 AT&T +//C- +//C- This software is subject to, and may be distributed under, the +//C- GNU General Public License, Version 2. The license should have +//C- accompanied the software or you may obtain a copy of the license +//C- from the Free Software Foundation at http://www.fsf.org . +//C- +//C- This program is distributed in the hope that it will be useful, +//C- but WITHOUT ANY WARRANTY; without even the implied warranty of +//C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//C- GNU General Public License for more details. +//C- +//C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library +//C- distributed by Lizardtech Software. On July 19th 2002, Lizardtech +//C- Software authorized us to replace the original DjVu(r) Reference +//C- Library notice by the following text (see doc/lizard2002.djvu): +//C- +//C- ------------------------------------------------------------------ +//C- | DjVu (r) Reference Library (v. 3.5) +//C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved. +//C- | The DjVu Reference Library is protected by U.S. Pat. No. +//C- | 6,058,214 and patents pending. +//C- | +//C- | This software is subject to, and may be distributed under, the +//C- | GNU General Public License, Version 2. The license should have +//C- | accompanied the software or you may obtain a copy of the license +//C- | from the Free Software Foundation at http://www.fsf.org . +//C- | +//C- | The computer code originally released by LizardTech under this +//C- | license and unmodified by other parties is deemed "the LIZARDTECH +//C- | ORIGINAL CODE." Subject to any third party intellectual property +//C- | claims, LizardTech grants recipient a worldwide, royalty-free, +//C- | non-exclusive license to make, use, sell, or otherwise dispose of +//C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the +//C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU +//C- | General Public License. This grant only confers the right to +//C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to +//C- | the extent such infringement is reasonably necessary to enable +//C- | recipient to make, have made, practice, sell, or otherwise dispose +//C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to +//C- | any greater extent that may be necessary to utilize further +//C- | modifications or combinations. +//C- | +//C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY +//C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +//C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF +//C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. +//C- +------------------------------------------------------------------ +// +// $Id: DataPool.h,v 1.10 2003/11/07 22:08:20 leonb Exp $ +// $Name: release_3_5_15 $ + +#ifndef _DATAPOOL_H +#define _DATAPOOL_H +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#if NEED_GNUG_PRAGMAS +# pragma interface +#endif + + +#include "GThreads.h" +#include "GString.h" +#include "GURL.h" + +#ifdef HAVE_NAMESPACES +namespace DJVU { +# ifdef NOT_DEFINED // Just to fool emacs c++ mode +} +#endif +#endif + +class ByteStream; + +/** @name DataPool.h + Files #"DataPool.h"# and #"DataPool.cpp"# implement classes \Ref{DataPool} + and \Ref{DataRange} used by DjVu decoder to access data. + + The main goal of class \Ref{DataPool} is to provide concurrent access + to the same data from many threads with a possibility to add data + from yet another thread. It is especially important in the case of the + Netscape plugin when data is not immediately available, but decoding + should be started as soon as possible. In this situation it is vital + to provide transparent access to the data from many threads possibly + blocking readers that try to access information that has not been + received yet. + + When the data is local though, it can be accessed directly using + standard IO mechanism. To provide a uniform interface for decoding + routines, \Ref{DataPool} supports file mode as well. + + @memo Thread safe data storage + @author Andrei Erofeev <[email protected]> + @version #$Id: DataPool.h,v 1.10 2003/11/07 22:08:20 leonb Exp $# +*/ + +//@{ + +/** Thread safe data storage. + The purpose of #DataPool# is to provide a uniform interface for + accessing data from decoding routines running in a multi-threaded + environment. Depending on the mode of operation it may contain the + actual data, may be connected to another #DataPool# or may be mapped + to a file. Regardless of the mode, the class returns data in a + thread-safe way, blocking reading threads if there is no data of + interest available. This blocking is especially useful in the + networking environment (plugin) when there is a running decoding thread, + which wants to start decoding as soon as there is just one byte available + blocking if necessary. + + Access to data in a #DataPool# may be direct (Using \Ref{get_data}() + function) or sequential (See \Ref{get_stream}() function). + + If the #DataPool# is not connected to anything, that is it contains + some real data, this data can be added to it by means of two + \Ref{add_data}() functions. One of them adds data sequentially maintaining + the offset of the last block of data added by it. The other can store + data anywhere. Thus it's important to realize, that there may be "white + spots" in the data storage. + + There is also a way to test if data is available for some given data + range (See \Ref{has_data}()). In addition to this mechanism, there are + so-called {\em trigger callbacks}, which are called, when there is + all data available for a given data range. + + Let us consider all modes of operation in details: + + \begin{enumerate} + \item {\bf Not connected #DataPool#}. In this mode the #DataPool# + contains some real data. As mentioned above, it may be added + by means of two functions \Ref{add_data}() operating independent + of each other and allowing to add data sequentially and + directly to any place of data storage. It's important to call + function \Ref{set_eof}() after all data has been added. + + Functions like \Ref{get_data}() or \Ref{get_stream}() can + be used to obtain direct or sequential access to the data. As + long as \Ref{is_eof}() is #FALSE#, #DataPool# will block every + reader, which is trying to read unavailable data until it + really becomes available. But as soon as \Ref{is_eof}() is + #TRUE#, any attempt to read non-existing data will read #0# bytes. + + Taking into account the fact, that #DataPool# was designed to + store DjVu files, which are in IFF formats, it becomes possible + to predict the size of the #DataPool# as soon as the first + #32# bytes have been added. This is invaluable for estimating + download progress. See function \Ref{get_length}() for details. + If this estimate fails (which means, that stored data is not + in IFF format), \Ref{get_length}() returns #-1#. + + Triggers may be added and removed by means of \Ref{add_trigger}() + and \Ref{del_trigger}() functions. \Ref{add_trigger}() takes + a data range. As soon as all data in that data range is + available, the trigger callback will be called. + + All trigger callbacks will be called when #EOF# condition + has been set. + + \item {\bf #DataPool# connected to another #DataPool#}. In this + {\em slave} mode you can map a given #DataPool# to any offsets + range inside another #DataPool#. You can connect the slave + #DataPool# even if there is no data in the master #DataPool#. + Any \Ref{get_data}() request will be forwarded to the master + #DataPool#, and it will be responsible for blocking readers + trying to access unavailable data. + + The usage of \Ref{add_data}() functions is prohibited for + connected #DataPool#s. + + The offsets range used to map a slave #DataPool# can be fully + specified (both start offset and length are positive numbers) + or partially specified (the length is negative). In this mode + the slave #DataPool# is assumed to extend up to the end + of the master #DataPool#. + + Triggers may be used with slave #DataPool#s as well as with + the master ones. + + Calling \Ref{stop}() function of a slave will stop only the slave + (and any other slave connected to it), but not the master. + + \Ref{set_eof}() function is meaningless for slaves. They obtain + the #ByteStream::EndOfFile# status from their master. + + Depending on the offsets range passed to the constructor, + \Ref{get_length}() returns different values. If the length + passed to the constructor was positive, then it is returned + by \Ref{get_length}() all the time. Otherwise the value returned + is either #-1# if master's length is still unknown (it didn't + manage to parse IFF data yet) or it is calculated as + #masters_length-slave_start#. + + \item {\bf #DataPool# connected to a file}. This mode is quite similar + to the case, when the #DataPool# is connected to another + #DataPool#. Similarly, the #DataPool# stores no data inside. + It just forwards all \Ref{get_data}() requests to the underlying + source (a file in this case). Thus these requests will never + block the reader. But they may return #0# if there is no data + available at the requested offset. + + The usage of \Ref{add_data}() functions is meaningless and + is prohibited. + + \Ref{is_eof}() function always returns #TRUE#. Thus \Ref{set_eof}() + us meaningless and does nothing. + + \Ref{get_length}() function always returns the file size. + + Calling \Ref{stop}() function will stop this #DataPool# and + any other slave connected to it. + + Trigger callbacks passed through \Ref{add_trigger}() function + are called immediately. + + This mode is useful to read and decode DjVu files without reading + and storing them in full in memory. + \end{enumerate} +*/ + +class DataPool : public GPEnabled +{ +public: // Classes used internally by DataPool + // These are declared public to support buggy C++ compilers. + class Incrementor; + class Reader; + class Trigger; + class OpenFiles; + class OpenFiles_File; + class BlockList; + class Counter; +protected: + DataPool(void); + +public: + /** @name Initialization */ + //@{ + /** Default creator. Will prepare #DataPool# for accepting data + added through functions \Ref{add_data}(). Use \Ref{connect}() + functions if you want to map this #DataPool# to another or + to a file. */ + static GP<DataPool> create(void); + + /** Creates and initialized the #DataPool# with data from stream #str#. + The constructor will read the stream's contents and add them + to the pool using the \Ref{add_data}() function. Afterwards it + will call \Ref{set_eof}() function, and no other data will be + allowed to be added to the pool. */ + static GP<DataPool> create(const GP<ByteStream> & str); + + /** Initializes the #DataPool# in slave mode and connects it + to the specified offsets range of the specified master #DataPool#. + It is equivalent to calling default constructor and function + \Ref{connect}(). + + @param master_pool Master #DataPool# providing data for this slave + @param start Beginning of the offsets range which the slave is + mapped into + @param length Length of the offsets range. If negative, the range + is assumed to extend up to the end of the master #DataPool#. + */ + static GP<DataPool> create(const GP<DataPool> & master_pool, int start=0, int length=-1); + + /** Initializes the #DataPool# in slave mode and connects it + to the specified offsets range of the specified file. + It is equivalent to calling default constructor and function + \Ref{connect}(). + @param url Name of the file to connect to. + @param start Beginning of the offsets range which the #DataPool# is + mapped into + @param length Length of the offsets range. If negative, the range + is assumed to extend up to the end of the file. + */ + static GP<DataPool> create(const GURL &url, int start=0, int length=-1); + + virtual ~DataPool(); + + /** Switches the #DataPool# to slave mode and connects it to the + specified offsets range of the master #DataPool#. + @param master_pool Master #DataPool# providing data for this slave + @param start Beginning of the offsets range which the slave is + mapped into + @param length Length of the offsets range. If negative, the range + is assumed to extend up to the end of the master #DataPool#. + */ + void connect(const GP<DataPool> & master_pool, int start=0, int length=-1); + /** Connects the #DataPool# to the specified offsets range of + the named #url#. + @param url Name of the file to connect to. + @param start Beginning of the offsets range which the #DataPool# is + mapped into + @param length Length of the offsets range. If negative, the range + is assumed to extend up to the end of the file. + */ + void connect(const GURL &url, int start=0, int length=-1); + //@} + + /** Tells the #DataPool# to stop serving readers. + + If #only_blocked# flag is #TRUE# then only those requests will + be processed, which would not block. Any attempt to get non-existing + data would result in a #STOP# exception (instead of blocking until + data is available). + + If #only_blocked# flag is #FALSE# then any further attempt to read + from this #DataPool# (as well as from any #DataPool# connected + to this one) will result in a #STOP# exception. */ + void stop(bool only_blocked=false); + + /** @name Adding data. + Please note, that these functions are for not connected #DataPool#s + only. You can not add data to a #DataPool#, which is connected + to another #DataPool# or to a file. + */ + //@{ + /** Appends the new block of data to the #DataPool#. There are two + \Ref{add_data}() functions available. One is for adding data + sequentially. It keeps track of the last byte position, which has + been stored {\bf by it} and always appends the next block after + this position. The other \Ref{add_data}() can store data anywhere. + + The function will unblock readers waiting for data if this data + arrives with this block. It may also trigger some {\em trigger + callbacks}, which may have been added by means of \Ref{add_trigger}() + function. + + {\bf Note:} After all the data has been added, it's necessary + to call \Ref{set_eof}() to tell the #DataPool# that nothing else + is expected. + + {\bf Note:} This function may not be called if the #DataPool# + has been connected to something. + + @param buffer data to append + @param size length of the {\em buffer} + */ + void add_data(const void * buffer, int size); + + /** Stores the specified block of data at the specified offset. + Like the function above this one can also unblock readers + waiting for data and engage trigger callbacks. The difference + is that {\bf this} function can store data anywhere. + + {\bf Note:} After all the data has been added, it's necessary + to call \Ref{set_eof}() to tell the #DataPool# that nothing else + is expected. + + {\bf Note:} This function may not be called if the #DataPool# + has been connected to something. + + @param buffer data to store + @param offset where to store the data + @param size length of the {\em buffer} */ + void add_data(const void * buffer, int offset, int size); + + /** Tells the #DataPool# that all data has been added and nothing else + is anticipated. When #EOF# is true, any reader attempting to read + non existing data will not be blocked. It will either read #ZERO# + bytes or will get an #ByteStream::EndOfFile# exception (see \Ref{get_data}()). + Calling this function will also activate all registered trigger + callbacks. + + {\bf Note:} This function is meaningless and does nothing + when the #DataPool# is connected to another #DataPool# or to + a file. */ + void set_eof(void); + //@} + + /** @name Accessing data. + These functions provide direct and sequential access to the + data of the #DataPool#. If the #DataPool# is not connected + (contains some real data) then it handles the requests itself. + Otherwise they are forwarded to the master #DataPool# or the file. + */ + //@{ + /** Attempts to return a block of data at the given #offset# + of the given #size#. + + \begin{enumerate} + \item If the #DataPool# is connected to another #DataPool# or + to a file, the request will just be forwarded to them. + \item If the #DataPool# is not connected to anything and + some of the data requested is in the internal buffer, + the function copies available data to #buffer# and returns + immediately. + + If there is no data available, and \Ref{is_eof}() returns + #FALSE#, the reader (and the thread) will be {\bf blocked} + until the data actually arrives. Please note, that since + the reader is blocked, it should run in a separate thread + so that other threads have a chance to call \Ref{add_data}(). + If there is no data available, but \Ref{is_eof}() is #TRUE# + the behavior is different and depends on the #DataPool#'s + estimate of the file size: + \begin{itemize} + \item If #DataPool# learns from the IFF structure of the + data, that its size should be greater than it + really is, then any attempt to read non-existing + data in the range of {\em valid} offsets will + result in an #ByteStream::EndOfFile# exception. This is done to + indicate, that there was an error in adding data, + and the data requested is {\bf supposed} to be + there, but has actually not been added. + \item If #DataPool#'s expectations about the data size + coincide with the reality then any attempt to + read data beyond the legal range of offsets will + result in #ZERO# bytes returned. + \end{itemize}. + \end{enumerate}. + + @param buffer Buffer to be filled with data + @param offset Offset in the #DataPool# to read data at + @param size Size of the {\em buffer} + @return The number of bytes actually read + @exception STOP The stream has been stopped + @exception EOF The requested data is not there and will not be added, + although it should have been. + */ + int get_data(void * buffer, int offset, int size); + + /** Returns a \Ref{ByteStream} to access contents of the #DataPool# + sequentially. By reading from the returned stream you basically + call \Ref{get_data}() function. Thus, everything said for it + remains true for the stream too. */ + GP<ByteStream> get_stream(void); + //@} + + /** @name State querying functions. */ + //@{ + /** Returns #TRUE# if this #DataPool# is connected to another #DataPool# + or to a file. */ + bool is_connected(void) const; + + /** Returns #TRUE# if all data available for offsets from + #start# till #start+length-1#. If #length# is negative, the + range is assumed to extend up to the end of the #DataPool#. + This function works both for connected and not connected #DataPool#s. + Once it returned #TRUE# for some offsets range, you can be + sure that the subsequent \Ref{get_data}() request will not block. + */ + bool has_data(int start, int length); + + /* Returns #TRUE# if no more data is planned to be added. + + {\bf Note:} This function always returns #TRUE# when the #DataPool# + has been initialized with a file name. */ + bool is_eof(void) const {return eof_flag;} + + /** Returns the {\em length} of data in the #DataPool#. The value + returned depends on the mode of operation: + \begin{itemize} + \item If the #DataPool# is not connected to anything then + the length returned is either calculated by interpreting + the IFF structure of stored data (if successful) or + by calculating the real size of data after \Ref{set_eof}() + has been called. Otherwise it is #-1#. + \item If the #DataPool# is connected to a file, the length + is calculated basing on the length passed to the + \Ref{connect}() function and the file size. + \item If the #DataPool# is connected to a master #DataPool#, + the length is calculated basing on the value returned + by the master's #get_length()# function and the length + passed to the \Ref{connect}() function. + \end{itemize}. */ + int get_length(void) const; + /** Returns the number of bytes of data available in this #DataPool#. + Contrary to the \Ref{get_length}() function, this one doesn't try + to interpret the IFF structure and predict the file length. + It just returns the number of bytes of data really available inside + the #DataPool#, if it contains data, or inside its range, if it's + connected to another #DataPool# or a file. */ + int get_size(void) const {return get_size(0, -1);} + //@} + + /** @name Trigger callbacks. + {\em Trigger callbacks} are special callbacks called when + all data for the given range of offsets has been made available. + Since reading unavailable data may result in a thread block, + which may be bad, the usage of {\em trigger callbacks} appears + to be a convenient way to signal availability of data. + + You can add a trigger callback in two ways: + \begin{enumerate} + \item By specifying a range. This is the most general case + \item By providing just one {\em threshold}. In this case + the range is assumed to start from offset #ZERO# and + last for {\em threshold}+1 bytes. + \end{enumerate} + */ + //@{ + /** Associates the specified {\em trigger callback} with the + given data range. + + {\bf Note:} The callback may be called immediately if all + data for the given range is already available or #EOF# is #TRUE#. + + @param start The beginning of the range for which all data + should be available + @param length If the {\em length} is not negative then the callback + will be called when there is data available for every + offset from {\em start} to {\em start+length-1}. + If {\em thresh} is negative, the callback is called after + #EOF# condition has been set. + @param callback Function to call + @param cl_data Argument to pass to the callback when it's called. */ + void add_trigger(int start, int length, +// void (* callback)(GP<GPEnabled> &), GP<GPEnabled> cl_data); + void (* callback)(void *), void * cl_data); + + /** Associates the specified {\em trigger callback} with the + specified threshold. + + This function is a simplified version of the function above. + The callback will be called when there is data available for + every offset from #0# to #thresh#, if #thresh# is positive, or + when #EOF# condition has been set otherwise. */ +// void add_trigger(int thresh, void (* callback)(GP<GPEnabled> &), GP<GPEnabled> cl_data); + void add_trigger(int thresh, void (* callback)(void *), void * cl_data); + + /** Use this function to unregister callbacks, which are no longer + needed. {\bf Note!} It's important to do it when the client + is about to be destroyed. */ + void del_trigger(void (* callback)(void *), void * cl_data); +// void del_trigger(void (* callback)(GP<GPEnabled> &), GP<GPEnabled> cl_data); + //@} + + /** Loads data from the file into memory. This function is only useful + for #DataPool#s getting data from a file. It descends the #DataPool#s + hierarchy until it either reaches a file-connected #DataPool# + or #DataPool# containing the real data. In the latter case it + does nothing, in the first case it makes the #DataPool# read all + data from the file into memory and stop using the file. + + This may be useful when you want to overwrite the file and leave + existing #DataPool#s with valid data. */ + void load_file(void); + /** This function will make every #DataPool# in the program, which + is connected to a file, to load the file contents to the main + memory and close the file. This feature is important when you + want to do something with the file like remove or overwrite it + not affecting the rest of the program. */ + static void load_file(const GURL &url); + + /** This function will remove OpenFiles filelist. */ + static void close_all(void); + + // Internal. Used by 'OpenFiles' + void clear_stream(const bool release = true); + + /** Useful in comparing data pools. Returns true if dirived from + same URL or bytestream. */ + bool simple_compare(DataPool &pool) const; +private: + bool eof_flag; + bool stop_flag; + bool stop_blocked_flag; + + Counter *active_readers; + + // Source or storage of data + GP<DataPool> pool; + GURL furl; + GP<OpenFiles_File> fstream; + GCriticalSection class_stream_lock; + GP<ByteStream> data; + GCriticalSection data_lock; + BlockList *block_list; + int add_at; + int start, length; + + // List of readers waiting for data + GPList<Reader> readers_list; + GCriticalSection readers_lock; + + // Triggers + GPList<Trigger> triggers_list; // List of passed or our triggers + GCriticalSection triggers_lock; // Lock for the list above + GCriticalSection trigger_lock; // Lock for static_trigger_cb() + + void init(void); + void wait_for_data(const GP<Reader> & reader); + void wake_up_all_readers(void); + void check_triggers(void); + int get_data(void * buffer, int offset, int size, int level); + int get_size(int start, int length) const; + void restart_readers(void); + +// static void static_trigger_cb(GP<GPEnabled> &); + static void static_trigger_cb(void *); + void trigger_cb(void); + void analyze_iff(void); + void added_data(const int offset, const int size); +public: + static const char *Stop; + friend class FCPools; +}; + +inline bool +DataPool::simple_compare(DataPool &pool) const +{ + // return true if these pools are identical. False means they may or may + // not be identical. + return (this == &pool) + ||(furl.is_valid()&&!furl.is_empty()&&pool.furl.is_valid()&&(furl == pool.furl)) + ||(data && (data == pool.data)); +} + +inline bool +DataPool::is_connected(void) const +{ + return furl.is_local_file_url() || pool!=0; +} + +//@} + + +#ifdef HAVE_NAMESPACES +} +# ifndef NOT_USING_DJVU_NAMESPACE +using namespace DJVU; +# endif +#endif +#endif |