summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordCursor.h')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordCursor.h445
1 files changed, 445 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h
new file mode 100644
index 00000000..ba6e9732
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordCursor.h
@@ -0,0 +1,445 @@
+//
+// WordList.h
+//
+// NAME
+//
+// search specification and results for WordList.
+//
+// SYNOPSIS
+//
+// #include <WordList.h>
+//
+// int callback(WordList *, WordDBCursor& , const WordReference *, Object &)
+// {
+// ...
+// }
+//
+// Object* data = ...
+//
+// WordList *words = ...;
+//
+// WordCursor *search = words->Cursor(callback, data);
+// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"));
+// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"), callback, data);
+//
+// ...
+//
+// if(search->Walk() == NOTOK) bark;
+// List* results = search->GetResults();
+//
+// if(search->WalkNext() == OK)
+// dosomething(search->GetFound());
+//
+// DESCRIPTION
+//
+// WordCursor is an iterator on an inverted index. It is created by
+// asking a <i>WordList</i> object with the <i>Cursor.</i> There is
+// no other way to create a WordCursor object.
+// When the <i>Walk*</i> methods return,
+// the WordCursor object contains the result of the search and
+// status information that indicates if it reached the end of
+// the list (IsAtEnd() method).
+//
+// The <b>callback</b> function that is called each time a match is
+// found takes the following arguments:
+// <pre>
+// WordList* words pointer to the inverted index handle.
+// WordDBCursor& cursor to call Del() and delete the current match
+// WordReference* wordRef is the match
+// Object& data is the user data provided by the caller when
+// search began.
+// </pre>
+//
+// The <i>WordKey</i> object that specifies the search criterion
+// may be used as follows (assuming word is followed by DOCID and
+// LOCATION):
+//
+// Ex1: <b>WordKey("word <DEF> <UNDEF> <UNDEF>")</b> find all occurrences
+// of <i>word</i>.
+//
+// Ex2: <b>WordKey("meet <UNDEF> <UNDEF> <UNDEF>")</b> find all occurrences
+// starting with <i>meet</i>, including <i>meeting</i> etc.
+//
+// Ex3: <b>WordKey("meet <DEF> <UNDEF> 1")</b> find all occurrences of
+// <i>meet</i> that occur at LOCATION 1 in any DOCID. This can
+// be inefficient since the search has to scan all occurrences
+// of <i>meet</i> to find the ones that occur at LOCATION 1.
+//
+// Ex4: <b>WordKey("meet <DEF> 2 <UNDEF>")</b> find all occurrences of
+// <i>meet</i> that occur in DOCID 2, at any location.
+//
+// Interface functions are virtual so that a derivation of the
+// class is possible. Some functions are meant to be used by derived
+// classes such as the <b>Initialize</b> function. All data members
+// should be accessed using the corresponding accessor if possible.
+//
+// END
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordCursor.h,v 1.4 2004/05/28 13:15:26 lha Exp $
+//
+
+#ifndef _WordCursor_h_
+#define _WordCursor_h_
+
+#ifndef SWIG
+#include "htString.h"
+#include "WordKey.h"
+#include "WordDB.h"
+
+class WordList;
+class WordDBCursor;
+#endif /* SWIG */
+//
+// Possible values of the action argument of WordList::Walk
+// check walk function in WordList.cc for info on these:
+//
+#define HTDIG_WORDLIST_COLLECTOR 0x0001
+#define HTDIG_WORDLIST_WALKER 0x0002
+
+#ifndef SWIG
+//
+// Type of the callback argument in WordCursor
+//
+typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &);
+#endif /* SWIG */
+
+//
+// Possible values of the status member
+//
+//
+// WalkNext reached the end of the matches
+//
+#define WORD_WALK_ATEND 0x0001
+//
+// Failed to acquire Berkeley DB cursor
+//
+#define WORD_WALK_CURSOR_FAILED 0x0002
+//
+// Berkeley DB Get operation failed
+//
+#define WORD_WALK_GET_FAILED 0x0004
+//
+// Callback function returned NOTOK
+//
+#define WORD_WALK_CALLBACK_FAILED 0x0008
+//
+// WalkNextStep hit an entry that does not match the
+// searched key.
+//
+#define WORD_WALK_NOMATCH_FAILED 0x0010
+//
+// WordCursor contains undefined data
+//
+#define WORD_WALK_FAILED 0xffffffff
+
+//
+// Possible return values of the IsA() method
+//
+#define WORD_CURSOR 1
+#define WORD_CURSORS 2
+
+//
+// Wordlist::Walk uses WordCursor for :
+// state information : cursor
+// search term description
+// debug/trace/benchmarking
+// search result format description
+//
+class WordCursor
+{
+ public:
+#ifndef SWIG
+ //
+ // Private constructor. Creator of the object must then call Initialize()
+ // prior to using any other methods.
+ //
+ WordCursor() { Clear(); }
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursor(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, WordKey(), callback, callback_data, HTDIG_WORDLIST_WALKER); }
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursor(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { Clear(); Initialize(words, searchKey, 0, 0, action); }
+ //-
+ // Private constructor. See WordList::Cursor method with same prototype for
+ // description.
+ //
+ WordCursor(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER); }
+#endif /* SWIG */
+ virtual ~WordCursor() {}
+ //-
+ // Clear all data in object, set <b>GetResult()</b> data to NULL but
+ // do not delete it (the application is responsible for that).
+ //
+ virtual void Clear();
+ virtual void ClearInternal();
+ virtual void ClearResult();
+
+ //-
+ // Returns the type of the object. May be overloaded by
+ // derived classes to differentiate them at runtime.
+ // Returns WORD_CURSOR.
+ //
+ virtual int IsA() const { return WORD_CURSOR; }
+
+ //-
+ // Returns true if WalkNext() step entries in strictly increasing
+ // order, false if it step entries in random order.
+ //
+ virtual int Ordered() const { return 1; }
+
+ //-
+ // Optimize the cursor before starting a Walk.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Optimize() { return OK; }
+
+ //-
+ // Save in <b>buffer</b> all the information necessary to resume
+ // the walk at the point it left. The ASCII representation of the
+ // last key found (GetFound()) is written in <b>buffer</b> using the
+ // WordKey::Get method.
+ //
+ virtual int ContextSave(String& buffer) const { found.Get(buffer); return OK; }
+ //-
+ // Restore from buffer all the information necessary to
+ // resume the walk at the point it left. The <b>buffer</b> is expected
+ // to contain an ASCII representation of a WordKey (see WordKey::Set
+ // method). A <b>Seek</b> is done on the key and the object is prepared
+ // to jump to the next occurrence when <b>WalkNext</b> is called (the
+ // cursor_get_flags is set to <i>DB_NEXT.</i>
+ //
+ virtual int ContextRestore(const String& buffer);
+
+#ifndef SWIG
+ //-
+ // Walk and collect data from the index.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int Walk();
+#endif /* SWIG */
+ //-
+ // Must be called before other Walk methods are used.
+ // Fill internal state according to input parameters
+ // and move before the first matching entry.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkInit();
+ //-
+ // Move before the first index matching entry.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkRewind();
+ //-
+ // Move to the next matching entry.
+ // At end of list, WORD_WALK_ATEND is returned.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkNext();
+#ifndef SWIG
+ //-
+ // Advance the cursor one step. The entry pointed to by the cursor may
+ // or may not match the requirements. Returns OK if entry pointed
+ // by cursor matches requirements. Returns NOTOK on
+ // failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry
+ // does not match requirements, it's safe to call WalkNextStep again
+ // until either OK or NOTOK is returned.
+ //
+ virtual int WalkNextStep();
+#endif /* SWIG */
+ //-
+ // Terminate Walk, free allocated resources.
+ // Returns OK on success, NOTOK otherwise.
+ //
+ virtual int WalkFinish();
+ //
+ // Find out if cursor should better jump to the next possible key
+ // (DB_SET_RANGE) instead of sequential iterating (DB_NEXT). If it
+ // is decided that jump is a better move : cursor_set_flags =
+ // DB_SET_RANGE key = calculated next possible key Else do nothing
+ // Return OK if skipping successfull. Returns WORD_WALK_ATEND if no
+ // more possible match, reached the maximum. Returns
+ // WORD_WALK_FAILED on general failure, occurs if called and no
+ // skipping necessary.
+ //
+ int SkipUselessSequentialWalking();
+
+ //-
+ // Move before the inverted index position specified in <b>patch.</b>
+ // May only be called after a successfull call to the <i>WalkNext</i>
+ // or <i>WalkNextStep</i>method.
+ // Copy defined fields from <b>patch</b> into a copy of the
+ // <i>found</i> data member and
+ // initialize internal state so that <i>WalkNext</i> jumps to
+ // this key next time it's called (cursor_get_flag set to DB_SET_RANGE).
+ // Returns OK if successfull, NOTOK otherwise.
+ //
+ virtual int Seek(const WordKey& patch);
+
+ //-
+ // Returns true if cursor is positioned after the last possible
+ // match, false otherwise.
+ //
+ virtual int IsAtEnd() const { return status == WORD_WALK_ATEND; }
+
+ //
+ // Accessors for input parameters
+ //
+ //-
+ // Returns the search criterion.
+ //
+ WordKey& GetSearch() { return searchKey; }
+#ifndef SWIG
+ const WordKey& GetSearch() const { return searchKey; }
+#endif /* SWIG */
+ //-
+ // Returns the type of action when a matching entry
+ // is found.
+ //
+ int GetAction() const { return action; }
+ //
+ // Accessors for output parameters
+ //
+ //-
+ // Returns the list of WordReference found. The application
+ // is responsible for deallocation of the list.
+ //
+ List *GetResults() { return collectRes; }
+ //-
+ // For debugging purposes. Returns the list of WordReference hit
+ // during the search
+ // process. Some of them match the searched key, some don't.
+ // The application is responsible for deallocation of the list.
+ //
+ List *GetTraces() { return traceRes; }
+ //-
+ // For debugging purposes. Set the list of WordReference hit
+ // during the search process.
+ //
+ void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; }
+ //-
+ // Returns the last entry hit by the search. Only contains
+ // a valid value if the last <i>WalkNext</i> or <i>WalkNextStep</i>
+ // call was successfull (i.e. returned OK).
+ //
+ const WordReference& GetFound() { return found; }
+ //-
+ // Returns the number of occurrences of the searched word
+ // in the inverted index in the <b>noccurrence</b> parameter.
+ // Returns OK on success, NOTOK on failure.
+ //
+ virtual int Noccurrence(unsigned int& noccurrence) const;
+
+#ifndef SWIG
+ //-
+ // Convert the whole structure to an ASCII string description
+ // Returns OK if successfull, NOTOK otherwise.
+ //
+ virtual int Get(String& bufferout) const;
+ String Get() const { String tmp; Get(tmp); return tmp; }
+
+ protected:
+
+ //-
+ // Protected method. Derived classes should use this function to initialize
+ // the object if they do not call a WordCursor constructor in their own
+ // constructutor. Initialization may occur after the object is created
+ // and must occur before a <b>Walk*</b> method is called. See the
+ // DESCRIPTION section for the semantics of the arguments.
+ // Return OK on success, NOTOK on error.
+ //
+ int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction);
+
+ //
+ // Input parameters
+ //
+ //-
+ // Input data. The key to be searched, see DESCRIPTION for more information.
+ //
+ WordKey searchKey;
+ //
+ // Input data. What do do when a WordReference is found.
+ // Can either be
+ // HTDIG_WORDLIST_COLLECTOR WordReference found stored in collectRes
+ // HTDIG_WORDLIST_WALKER callback is called for each WordReference found
+ //
+ int action;
+
+ //
+ // Input data. Callback function called for each match found.
+ //
+ wordlist_walk_callback_t callback;
+ //
+ // Input data. Argument given to callback, contains arbitrary
+ // caller defined data.
+ //
+ Object *callback_data;
+
+ //
+ // Output parameters
+ //
+ //
+ // Output data. List of WordReference found in the search.
+ //
+ List *collectRes;
+
+ //-
+ // Output data. Last match found. Use GetFound() to retrieve it.
+ //
+ WordReference found;
+ //-
+ // Output data. WORD_WALK_ATEND if cursor is past last match,
+ // OK otherwise. Use GetStatus() to retrieve it.
+ //
+ int status;
+
+ //
+ // Debugging section. Do not use unless you know exactly what you do.
+ //
+ //
+ // Collect everything found while searching (not necessarily matching)
+ //
+ List *traceRes;
+
+ //
+ // Internal state
+ //
+ //
+ // The actual Berkeley DB cursor.
+ //
+ WordDBCursor cursor;
+ //
+ // The latest retrieved key and data
+ //
+ String key;
+ String data;
+ //
+ // The shorted prefix key computed from searchKey
+ //
+ WordKey prefixKey;
+ //-
+ // WalkNext leap is either DB_NEXT or DB_SET_RANGE.
+ //
+ int cursor_get_flags;
+ //
+ // True if search key is a prefix key
+ //
+ int searchKeyIsSameAsPrefix;
+ //-
+ // The inverted index used by this cursor.
+ //
+ WordList *words;
+#endif /* SWIG */
+};
+
+#endif /* _WordCursor_h_ */