diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordDict.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordDict.cc | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDict.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDict.cc new file mode 100644 index 00000000..85bac6f5 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordDict.cc @@ -0,0 +1,274 @@ +// +// WordDict.cc +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: WordDict.cc,v 1.4 2004/05/28 13:15:26 lha Exp $ +// +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include <fcntl.h> + +#include "WordDict.h" +#include "WordListOne.h" + +#define WORD_DICT_CURSOR_FIRST 1 +#define WORD_DICT_CURSOR_NEXT 2 + +class WordDictCursor { +public: + int info; + String prefix; + WordDBCursor* cursor; +}; + +WordDict::~WordDict() +{ + delete db; +} + +int WordDict::Initialize(WordList* nwords) +{ + words = nwords; + db = new WordDB(nwords->GetContext()->GetDBInfo()); + return OK; +} + +int WordDict::Open() +{ + const String& filename = words->Filename(); + int flags = words->Flags(); + + db->set_pagesize(words->Pagesize()); + + return db->Open(filename, "dict", DB_BTREE, flags, 0666, WORD_DB_DICT) == 0 ? OK : NOTOK; +} + +int WordDict::Remove() +{ + return db->Remove(words->Filename(), "dict") == 0 ? OK : NOTOK; +} + +int WordDict::Close() +{ + return db->Close() == 0 ? OK : NOTOK; +} + +int WordDict::Serial(const String& word, unsigned int& serial) +{ + int ret; + WordDictRecord entry; + if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND) + return NOTOK; + if(ret == DB_NOTFOUND) { + words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id); + if(entry.Put(db, word) != 0) return NOTOK; + } + serial = entry.id; + + return OK; +} + +int WordDict::SerialExists(const String& word, unsigned int& serial) +{ + int ret; + WordDictRecord entry; + if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND) + return NOTOK; + + serial = ret == DB_NOTFOUND ? WORD_DICT_SERIAL_INVALID : entry.id; + + return OK; +} + +int WordDict::SerialRef(const String& word, unsigned int& serial) +{ + int ret; + WordDictRecord entry; + if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND) + return NOTOK; + if(ret == DB_NOTFOUND) + words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id); + entry.count++; + if(entry.Put(db, word) != 0) return NOTOK; + serial = entry.id; + + return OK; +} + +int WordDict::Noccurrence(const String& word, unsigned int& noccurrence) const +{ + if(word.empty()) { + fprintf(stderr, "WordDict::Noccurrence: null word\n"); + return NOTOK; + } + WordDictRecord entry; + noccurrence = 0; + int ret; + if((ret = entry.Get(db, word)) != 0) { + if(ret != DB_NOTFOUND) + return NOTOK; + } + noccurrence = entry.count; + + return OK; +} + +int WordDict::Normalize(String& word) const +{ + const WordType& wtype = words->GetContext()->GetType(); + + return wtype.Normalize(word); +} + +int WordDict::Incr(const String& word, unsigned int incr) +{ + int ret; + WordDictRecord entry; + if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND) + return NOTOK; + if(ret == DB_NOTFOUND) + words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id); + entry.count += incr; + if(entry.Put(db, word) != 0) return NOTOK; + return OK; +} + +int WordDict::Decr(const String& word, unsigned int decr) +{ + WordDictRecord entry; + int ret; + if((ret = entry.Get(db, word)) != 0) { + if(ret == DB_NOTFOUND) + fprintf(stderr, "WordDict::Unref(%s) Unref on non existing word occurrence\n", (const char*)word); + return NOTOK; + } + entry.count -= decr; + if(entry.count > 0) + ret = entry.Put(db, word) == 0 ? OK : NOTOK; + else + ret = entry.Del(db, word) == 0 ? OK : NOTOK; + + return ret; +} + +int WordDict::Put(const String& word, unsigned int noccurrence) +{ + int ret; + WordDictRecord entry; + if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND) + return NOTOK; + if(ret == DB_NOTFOUND) + words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id); + entry.count = noccurrence; + if(entry.Put(db, word) != 0) return NOTOK; + return OK; +} + +List *WordDict::Words() const +{ + String key; + String coded; + WordDBCursor* cursor = db->Cursor(); + List* list = new List; + + while(cursor->Get(key, coded, DB_NEXT) == 0) + list->Add(new String(key)); + + delete cursor; + + return list; +} + +int WordDict::Exists(const String& word) const +{ + String tmp_word = word; + String coded; + + return db->Get(0, tmp_word, coded, 0) == 0; +} + +WordDictCursor* WordDict::Cursor() const +{ + WordDictCursor* cursor = new WordDictCursor; + cursor->cursor = db->Cursor(); + + return cursor; +} + +int WordDict::Next(WordDictCursor* cursor, String& word, WordDictRecord& record) +{ + String coded; + int ret = cursor->cursor->Get(word, coded, DB_NEXT); + if(ret != 0) { + delete cursor->cursor; + delete cursor; + } else { + record.Unpack(coded); + } + return ret; +} + +WordDictCursor* WordDict::CursorPrefix(const String& prefix) const +{ + WordDictCursor* cursor = new WordDictCursor; + cursor->cursor = db->Cursor(); + cursor->prefix = prefix; + cursor->info = WORD_DICT_CURSOR_FIRST; + + return cursor; +} + +int WordDict::NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record) +{ + String coded; + int ret; + if(cursor->info == WORD_DICT_CURSOR_FIRST) { + word = cursor->prefix; + ret = cursor->cursor->Get(word, coded, DB_SET_RANGE); + cursor->info = WORD_DICT_CURSOR_NEXT; + } else { + ret = cursor->cursor->Get(word, coded, DB_NEXT); + } + // + // Stop walking when 1) DB_NOTFOUND, 2) the word found is shorter than + // the required prefix, 3) the word found does not start with the + // required prefix. + // + if(ret != 0 || + cursor->prefix.length() > word.length() || + strncmp(cursor->prefix.get(), word.get(), cursor->prefix.length())) { + delete cursor->cursor; + delete cursor; + if(ret == 0) ret = DB_NOTFOUND; + } else { + record.Unpack(coded); + } + return ret; +} + +int WordDict::Write(FILE* f) +{ + WordDBCursor* cursor = db->Cursor(); + String key; + String coded; + unsigned int occurrence; + unsigned int id; + + while(cursor->Get(key, coded, DB_NEXT) == 0) { + int offset = 0; + coded.ber_shift(offset, occurrence); + coded.ber_shift(offset, id); + fprintf(f, "%s %d %d\n", (char*)key, id, occurrence); + } + + delete cursor; + + return OK; +} |