diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordDict.h')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordDict.h | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDict.h b/debian/htdig/htdig-3.2.0b6/htword/WordDict.h new file mode 100644 index 00000000..86b45717 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordDict.h @@ -0,0 +1,252 @@ +// +// WordDict.h +// +// NAME +// +// manage and use an inverted index dictionary. +// +// SYNOPSIS +// +// #include <mifluz.h> +// +// WordList* words = ...; +// WordDict* dict = words->Dict(); +// +// DESCRIPTION +// +// WordDict maps strings to unique identifiers and frequency in the +// inverted index. Whenever a new word is found, the WordDict class +// can be asked to assign it a serial number. When doing so, an entry +// is created in the dictionary with a frequency of zero. The application +// may then increment or decrement the frequency to reflect the inverted +// index content. +// +// The serial numbers range from 1 to 2^32 inclusive. +// +// A WordDict object is automatically created by the WordList object and +// should not be created directly by the application. +// +// END +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: WordDict.h,v 1.4 2004/05/28 13:15:26 lha Exp $ +// + +#ifndef _WordDict_h_ +#define _WordDict_h_ + +#include <stdio.h> + +#ifndef SWIG +#include "htString.h" +#include "WordDB.h" + +class WordList; +class WordDictCursor; + +#define WORD_DICT_SERIAL_INVALID 0 + +class WordDictRecord { + public: + inline WordDictRecord() { count = 0; id = WORD_DICT_SERIAL_INVALID; } + + inline int Unpack(const String& coded) { + int offset = 0; + coded.ber_shift(offset, count); + coded.ber_shift(offset, id); + return OK; + } + + inline int Pack(String& coded) const { + int offset = 0; + coded.ber_push(offset, count); + coded.ber_push(offset, id); + return OK; + } + + inline int Get(WordDB* db, const String& word) { + String tmp_word = word; + String coded(BER_MAX_BYTES * 2); + int ret; + if((ret = db->Get(0, tmp_word, coded, 0)) != 0) return ret; + + Unpack(coded); + + return ret; + } + + inline int Put(WordDB* db, const String& word) { + String coded(BER_MAX_BYTES * 2); + Pack(coded); + return db->Put(0, word, coded, 0); + } + + inline int Del(WordDB* db, const String& word) { + return db->Del(0, word); + } + + inline unsigned int Count() { return count; } + inline unsigned int Id() { return id; } + + unsigned int count; + unsigned int id; +}; +#endif /* SWIG */ + +class WordDict +{ + public: +#ifndef SWIG + //- + // Private constructor. + // + WordDict() { words = 0; db = 0; } + ~WordDict(); + + //- + // Bind the object a WordList inverted index. Return OK on success, + // NOTOK otherwise. + // + int Initialize(WordList* words); + + //- + // Open the underlying Berkeley DB sub-database. The enclosing + // file is given by the <i>words</i> data member. Return OK on success, + // NOTOK otherwise. + // + int Open(); + //- + // Destroy the underlying Berkeley DB sub-database. Return OK on success, + // NOTOK otherwise. + // + int Remove(); + //- + // Close the underlying Berkeley DB sub-database. Return OK on success, + // NOTOK otherwise. + // + int Close(); + + //- + // If the <b>word</b> argument exists in the dictionnary, return its + // serial number in the <b>serial</b> argument. If it does not already + // exists, assign it a serial number, create an entry with a frequency + // of zero and return the new serial in the <b>serial</b> argument. + // Return OK on success, NOTOK otherwise. + // + int Serial(const String& word, unsigned int& serial); + //- + // If the <b>word</b> argument exists in the dictionnary, return its + // serial number in the <b>serial</b> argument. If it does not exists + // set the <b>serial</b> argument to WORD_DICT_SERIAL_INVALID. + // Return OK on success, NOTOK otherwise. + // + int SerialExists(const String& word, unsigned int& serial); + //- + // Short hand for Serial() followed by Ref(). + // Return OK on success, NOTOK otherwise. + // + int SerialRef(const String& word, unsigned int& serial); + //- + // Return the frequency of the <b>word</b> argument + // in the <b>noccurrence</b> argument. + // Return OK on success, NOTOK otherwise. + // + int Noccurrence(const String& word, unsigned int& noccurrence) const; +#endif /* SWIG */ + + //- + // Short hand for words->GetContext()->GetType()->Normalize(word). + // Return OK on success, NOTOK otherwise. + // + int Normalize(String& word) const; + + //- + // Short hand for Incr(word, 1) + // + int Ref(const String& word) { return Incr(word, 1); } + //- + // Add <b>incr</b> to the frequency of the <b>word</b>. + // Return OK on success, NOTOK otherwise. + // + int Incr(const String& word, unsigned int incr); + //- + // Short hand for Decr(word, 1) + // + int Unref(const String& word) { return Decr(word, 1); } + //- + // Subtract <b>decr</b> to the frequency of the <b>word</b>. If + // the frequency becomes lower or equal to zero, remove the entry + // from the dictionnary and lose the association between the word and its + // serial number. + // Return OK on success, NOTOK otherwise. + // + int Decr(const String& word, unsigned int decr); + //- + // Set the frequency of <b>word</b> with the value of the <b>noccurrence</b> + // argument. + // + int Put(const String& word, unsigned int noccurrence); + + //- + // Return true if <b>word</b> exists in the dictionnary, false otherwise. + // + int Exists(const String& word) const; + +#ifndef SWIG + //- + // Return a pointer to the associated WordList object. + // + List* Words() const; + + //- + // Return a cursor to sequentially walk the dictionnary using the + // <b>Next</b> method. + // + WordDictCursor* Cursor() const; + //- + // Return the next entry in the dictionnary. The <b>cursor</b> argument + // must have been created using the <i>Cursor</i> method. The word is + // returned in the <b>word</b> argument and the record is returned in + // the <b>record</b> argument. + // On success the function returns 0, at the end of the dictionnary it + // returns DB_NOTFOUND. The <b>cursor</b> argument is deallocated when + // the function hits the end of the dictionnary or an error occurs. + // + int Next(WordDictCursor* cursor, String& word, WordDictRecord& record); + + //- + // Return a cursor to sequentially walk the entries of the dictionnary + // that start with the <b>prefix</b> argument, using the + // <b>NextPrefix</b> method. + // + WordDictCursor* CursorPrefix(const String& prefix) const; + //- + // Return the next prefix from the dictionnary. The <b>cursor</b> argument + // must have been created using the <i>CursorPrefix</i> method. The word is + // returned in the <b>word</b> argument and the record is returned in + // the <b>record</b> argument. The <b>word</b> is guaranteed to start with + // the prefix specified to the <b>CursorPrefix</b> method. + // On success the function returns 0, at the end of the dictionnary it + // returns DB_NOTFOUND. The <b>cursor</b> argument is deallocated when + // the function hits the end of the dictionnary or an error occurs. + // + int NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record); + + //- + // Dump the complete dictionary in the file descriptor <b>f.</b> The + // format of the dictionary is <i>word serial frequency</i>, one by + // line. + // + int Write(FILE* f); + + private: + WordList* words; + WordDB* db; +#endif /* SWIG */ +}; +#endif /* _WordDict_h_ */ |