diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordCursor.h')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordCursor.h | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h new file mode 100644 index 00000000..ba6e9732 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h @@ -0,0 +1,445 @@ +// +// WordList.h +// +// NAME +// +// search specification and results for WordList. +// +// SYNOPSIS +// +// #include <WordList.h> +// +// int callback(WordList *, WordDBCursor& , const WordReference *, Object &) +// { +// ... +// } +// +// Object* data = ... +// +// WordList *words = ...; +// +// WordCursor *search = words->Cursor(callback, data); +// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>")); +// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"), callback, data); +// +// ... +// +// if(search->Walk() == NOTOK) bark; +// List* results = search->GetResults(); +// +// if(search->WalkNext() == OK) +// dosomething(search->GetFound()); +// +// DESCRIPTION +// +// WordCursor is an iterator on an inverted index. It is created by +// asking a <i>WordList</i> object with the <i>Cursor.</i> There is +// no other way to create a WordCursor object. +// When the <i>Walk*</i> methods return, +// the WordCursor object contains the result of the search and +// status information that indicates if it reached the end of +// the list (IsAtEnd() method). +// +// The <b>callback</b> function that is called each time a match is +// found takes the following arguments: +// <pre> +// WordList* words pointer to the inverted index handle. +// WordDBCursor& cursor to call Del() and delete the current match +// WordReference* wordRef is the match +// Object& data is the user data provided by the caller when +// search began. +// </pre> +// +// The <i>WordKey</i> object that specifies the search criterion +// may be used as follows (assuming word is followed by DOCID and +// LOCATION): +// +// Ex1: <b>WordKey("word <DEF> <UNDEF> <UNDEF>")</b> find all occurrences +// of <i>word</i>. +// +// Ex2: <b>WordKey("meet <UNDEF> <UNDEF> <UNDEF>")</b> find all occurrences +// starting with <i>meet</i>, including <i>meeting</i> etc. +// +// Ex3: <b>WordKey("meet <DEF> <UNDEF> 1")</b> find all occurrences of +// <i>meet</i> that occur at LOCATION 1 in any DOCID. This can +// be inefficient since the search has to scan all occurrences +// of <i>meet</i> to find the ones that occur at LOCATION 1. +// +// Ex4: <b>WordKey("meet <DEF> 2 <UNDEF>")</b> find all occurrences of +// <i>meet</i> that occur in DOCID 2, at any location. +// +// Interface functions are virtual so that a derivation of the +// class is possible. Some functions are meant to be used by derived +// classes such as the <b>Initialize</b> function. All data members +// should be accessed using the corresponding accessor if possible. +// +// END +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: WordCursor.h,v 1.4 2004/05/28 13:15:26 lha Exp $ +// + +#ifndef _WordCursor_h_ +#define _WordCursor_h_ + +#ifndef SWIG +#include "htString.h" +#include "WordKey.h" +#include "WordDB.h" + +class WordList; +class WordDBCursor; +#endif /* SWIG */ +// +// Possible values of the action argument of WordList::Walk +// check walk function in WordList.cc for info on these: +// +#define HTDIG_WORDLIST_COLLECTOR 0x0001 +#define HTDIG_WORDLIST_WALKER 0x0002 + +#ifndef SWIG +// +// Type of the callback argument in WordCursor +// +typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &); +#endif /* SWIG */ + +// +// Possible values of the status member +// +// +// WalkNext reached the end of the matches +// +#define WORD_WALK_ATEND 0x0001 +// +// Failed to acquire Berkeley DB cursor +// +#define WORD_WALK_CURSOR_FAILED 0x0002 +// +// Berkeley DB Get operation failed +// +#define WORD_WALK_GET_FAILED 0x0004 +// +// Callback function returned NOTOK +// +#define WORD_WALK_CALLBACK_FAILED 0x0008 +// +// WalkNextStep hit an entry that does not match the +// searched key. +// +#define WORD_WALK_NOMATCH_FAILED 0x0010 +// +// WordCursor contains undefined data +// +#define WORD_WALK_FAILED 0xffffffff + +// +// Possible return values of the IsA() method +// +#define WORD_CURSOR 1 +#define WORD_CURSORS 2 + +// +// Wordlist::Walk uses WordCursor for : +// state information : cursor +// search term description +// debug/trace/benchmarking +// search result format description +// +class WordCursor +{ + public: +#ifndef SWIG + // + // Private constructor. Creator of the object must then call Initialize() + // prior to using any other methods. + // + WordCursor() { Clear(); } + //- + // Private constructor. See WordList::Cursor method with same prototype for + // description. + // + WordCursor(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, WordKey(), callback, callback_data, HTDIG_WORDLIST_WALKER); } + //- + // Private constructor. See WordList::Cursor method with same prototype for + // description. + // + WordCursor(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { Clear(); Initialize(words, searchKey, 0, 0, action); } + //- + // Private constructor. See WordList::Cursor method with same prototype for + // description. + // + WordCursor(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER); } +#endif /* SWIG */ + virtual ~WordCursor() {} + //- + // Clear all data in object, set <b>GetResult()</b> data to NULL but + // do not delete it (the application is responsible for that). + // + virtual void Clear(); + virtual void ClearInternal(); + virtual void ClearResult(); + + //- + // Returns the type of the object. May be overloaded by + // derived classes to differentiate them at runtime. + // Returns WORD_CURSOR. + // + virtual int IsA() const { return WORD_CURSOR; } + + //- + // Returns true if WalkNext() step entries in strictly increasing + // order, false if it step entries in random order. + // + virtual int Ordered() const { return 1; } + + //- + // Optimize the cursor before starting a Walk. + // Returns OK on success, NOTOK otherwise. + // + virtual int Optimize() { return OK; } + + //- + // Save in <b>buffer</b> all the information necessary to resume + // the walk at the point it left. The ASCII representation of the + // last key found (GetFound()) is written in <b>buffer</b> using the + // WordKey::Get method. + // + virtual int ContextSave(String& buffer) const { found.Get(buffer); return OK; } + //- + // Restore from buffer all the information necessary to + // resume the walk at the point it left. The <b>buffer</b> is expected + // to contain an ASCII representation of a WordKey (see WordKey::Set + // method). A <b>Seek</b> is done on the key and the object is prepared + // to jump to the next occurrence when <b>WalkNext</b> is called (the + // cursor_get_flags is set to <i>DB_NEXT.</i> + // + virtual int ContextRestore(const String& buffer); + +#ifndef SWIG + //- + // Walk and collect data from the index. + // Returns OK on success, NOTOK otherwise. + // + virtual int Walk(); +#endif /* SWIG */ + //- + // Must be called before other Walk methods are used. + // Fill internal state according to input parameters + // and move before the first matching entry. + // Returns OK on success, NOTOK otherwise. + // + virtual int WalkInit(); + //- + // Move before the first index matching entry. + // Returns OK on success, NOTOK otherwise. + // + virtual int WalkRewind(); + //- + // Move to the next matching entry. + // At end of list, WORD_WALK_ATEND is returned. + // Returns OK on success, NOTOK otherwise. + // + virtual int WalkNext(); +#ifndef SWIG + //- + // Advance the cursor one step. The entry pointed to by the cursor may + // or may not match the requirements. Returns OK if entry pointed + // by cursor matches requirements. Returns NOTOK on + // failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry + // does not match requirements, it's safe to call WalkNextStep again + // until either OK or NOTOK is returned. + // + virtual int WalkNextStep(); +#endif /* SWIG */ + //- + // Terminate Walk, free allocated resources. + // Returns OK on success, NOTOK otherwise. + // + virtual int WalkFinish(); + // + // Find out if cursor should better jump to the next possible key + // (DB_SET_RANGE) instead of sequential iterating (DB_NEXT). If it + // is decided that jump is a better move : cursor_set_flags = + // DB_SET_RANGE key = calculated next possible key Else do nothing + // Return OK if skipping successfull. Returns WORD_WALK_ATEND if no + // more possible match, reached the maximum. Returns + // WORD_WALK_FAILED on general failure, occurs if called and no + // skipping necessary. + // + int SkipUselessSequentialWalking(); + + //- + // Move before the inverted index position specified in <b>patch.</b> + // May only be called after a successfull call to the <i>WalkNext</i> + // or <i>WalkNextStep</i>method. + // Copy defined fields from <b>patch</b> into a copy of the + // <i>found</i> data member and + // initialize internal state so that <i>WalkNext</i> jumps to + // this key next time it's called (cursor_get_flag set to DB_SET_RANGE). + // Returns OK if successfull, NOTOK otherwise. + // + virtual int Seek(const WordKey& patch); + + //- + // Returns true if cursor is positioned after the last possible + // match, false otherwise. + // + virtual int IsAtEnd() const { return status == WORD_WALK_ATEND; } + + // + // Accessors for input parameters + // + //- + // Returns the search criterion. + // + WordKey& GetSearch() { return searchKey; } +#ifndef SWIG + const WordKey& GetSearch() const { return searchKey; } +#endif /* SWIG */ + //- + // Returns the type of action when a matching entry + // is found. + // + int GetAction() const { return action; } + // + // Accessors for output parameters + // + //- + // Returns the list of WordReference found. The application + // is responsible for deallocation of the list. + // + List *GetResults() { return collectRes; } + //- + // For debugging purposes. Returns the list of WordReference hit + // during the search + // process. Some of them match the searched key, some don't. + // The application is responsible for deallocation of the list. + // + List *GetTraces() { return traceRes; } + //- + // For debugging purposes. Set the list of WordReference hit + // during the search process. + // + void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; } + //- + // Returns the last entry hit by the search. Only contains + // a valid value if the last <i>WalkNext</i> or <i>WalkNextStep</i> + // call was successfull (i.e. returned OK). + // + const WordReference& GetFound() { return found; } + //- + // Returns the number of occurrences of the searched word + // in the inverted index in the <b>noccurrence</b> parameter. + // Returns OK on success, NOTOK on failure. + // + virtual int Noccurrence(unsigned int& noccurrence) const; + +#ifndef SWIG + //- + // Convert the whole structure to an ASCII string description + // Returns OK if successfull, NOTOK otherwise. + // + virtual int Get(String& bufferout) const; + String Get() const { String tmp; Get(tmp); return tmp; } + + protected: + + //- + // Protected method. Derived classes should use this function to initialize + // the object if they do not call a WordCursor constructor in their own + // constructutor. Initialization may occur after the object is created + // and must occur before a <b>Walk*</b> method is called. See the + // DESCRIPTION section for the semantics of the arguments. + // Return OK on success, NOTOK on error. + // + int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction); + + // + // Input parameters + // + //- + // Input data. The key to be searched, see DESCRIPTION for more information. + // + WordKey searchKey; + // + // Input data. What do do when a WordReference is found. + // Can either be + // HTDIG_WORDLIST_COLLECTOR WordReference found stored in collectRes + // HTDIG_WORDLIST_WALKER callback is called for each WordReference found + // + int action; + + // + // Input data. Callback function called for each match found. + // + wordlist_walk_callback_t callback; + // + // Input data. Argument given to callback, contains arbitrary + // caller defined data. + // + Object *callback_data; + + // + // Output parameters + // + // + // Output data. List of WordReference found in the search. + // + List *collectRes; + + //- + // Output data. Last match found. Use GetFound() to retrieve it. + // + WordReference found; + //- + // Output data. WORD_WALK_ATEND if cursor is past last match, + // OK otherwise. Use GetStatus() to retrieve it. + // + int status; + + // + // Debugging section. Do not use unless you know exactly what you do. + // + // + // Collect everything found while searching (not necessarily matching) + // + List *traceRes; + + // + // Internal state + // + // + // The actual Berkeley DB cursor. + // + WordDBCursor cursor; + // + // The latest retrieved key and data + // + String key; + String data; + // + // The shorted prefix key computed from searchKey + // + WordKey prefixKey; + //- + // WalkNext leap is either DB_NEXT or DB_SET_RANGE. + // + int cursor_get_flags; + // + // True if search key is a prefix key + // + int searchKeyIsSameAsPrefix; + //- + // The inverted index used by this cursor. + // + WordList *words; +#endif /* SWIG */ +}; + +#endif /* _WordCursor_h_ */ |