// // WordDBCache.h // // NAME // intermediate cache for WordList objects. // // SYNOPSIS // // Internal helper for the WordListOne object. // // DESCRIPTION // // To speed up bulk insertions, the WordDBCache allows them to remain in // memory as long as a given limit is not reached. The inserted entries // are them sorted and dumped into a file. When a given number of files // have been produced, they are merged into one. Eventually the resulting // list of entries is inserted into the WordList index. // // // END // // Part of the ht://Dig package // Copyright (c) 1999-2004 The ht://Dig Group // For copyright details, see the file COPYING in your distribution // or the GNU Library General Public License (LGPL) version 2 or later // // // $Id: WordDBCache.h,v 1.4 2004/05/28 13:15:26 lha Exp $ // #ifndef _WordDBCache_h_ #define _WordDBCache_h_ #include #include #include "htString.h" #include "List.h" #include "db.h" #include "lib.h" #include "myqsort.h" #include "WordList.h" class WordDB; class WordLock; // // Minimum size of the pulsing cache // #define WORD_DB_CACHE_MINIMUM (500 * 1024) // // We could use DBT instead but it's more than two times bigger and // time saving by the most efficient way of memory space is the whole // point of the cache. // class WordDBCacheEntry { public: char* key; unsigned int key_size; char* data; unsigned int data_size; }; class WordDBCache { public: inline WordDBCache(WordContext* ncontext) { context = ncontext; entries = (WordDBCacheEntry*)malloc(1000 * sizeof(WordDBCacheEntry)); entries_length = 0; entries_size = 1000; pool = (char*)malloc(WORD_DB_CACHE_MINIMUM); pool_length = 0; pool_size = pool_max = WORD_DB_CACHE_MINIMUM; } inline ~WordDBCache() { if(pool_length > 0) { fprintf(stderr, "WordDBCache::~WordDBCache: destructor called and cache not empty\n"); } free(entries); free(pool); } inline int ResizeEntries() { entries_size *= 2; entries = (WordDBCacheEntry*)realloc(entries, entries_size * sizeof(WordDBCacheEntry)); return entries ? 0 : DB_RUNRECOVERY; } inline int ResizePool(int wanted) { if(pool_size * 2 > pool_max) { if(pool_max > pool_size && pool_max > wanted) pool_size = pool_max; else return ENOMEM; } else { pool_size *= 2; } pool = (char*)realloc(pool, pool_size); return pool ? 0 : DB_RUNRECOVERY; } inline int Allocate(int size) { int ret; if(entries_length >= entries_size) if((ret = ResizeEntries()) != 0) return ret; if(pool_length + size >= pool_size) { if((ret = ResizePool(pool_length + size)) != 0) return ret; } return 0; } inline int GetMax() const { return pool_max; } inline int SetMax(int max) { if(max > pool_max) pool_max = max; return 0; } inline int SetCompare(int (*ncompare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *)) { compare = ncompare; return 0; } inline int Sort() { if(Absolute() != OK) return NOTOK; // // Reorder entries in increasing order // myqsort((void*)entries, entries_length, sizeof(WordDBCacheEntry), (myqsort_cmp)compare, (void*)context); return 0; } inline int Relative() { int i; for(i = 0; i < entries_length; i++) { entries[i].key = (char*)(entries[i].key - pool); entries[i].data = (char*)(entries[i].data - pool); } return OK; } inline int Absolute() { int i; for(i = 0; i < entries_length; i++) { entries[i].key = pool + (int)(entries[i].key); entries[i].data = pool + (int)(entries[i].data); } return OK; } inline int Entries(WordDBCacheEntry*& nentries, int& nentries_length) { nentries = entries; nentries_length = entries_length; return 0; } inline int Pool(char*& npool, int& npool_length) { npool = pool; npool_length = pool_length; return OK; } inline int Add(char* key, int key_size, char* data, int data_size) { int ret; if((ret = Allocate(key_size + data_size)) != 0) return ret; entries[entries_length].key = (char*)pool_length; entries[entries_length].key_size = key_size; entries[entries_length].data = (char*)(pool_length + key_size); entries[entries_length].data_size = data_size; entries_length++; memcpy(pool + pool_length, key, key_size); memcpy(pool + pool_length + key_size, data, data_size); pool_length += key_size + data_size; return 0; } inline int Flush() { entries_length = 0; pool_length = 0; return 0; } inline int Empty() { return entries_length <= 0; } private: WordDBCacheEntry* entries; int entries_length; int entries_size; char* pool; int pool_length; int pool_size; int pool_max; int (*compare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *); WordContext *context; }; class WordDBCacheFile : public Object { public: WordDBCacheFile() { size = 0; } String filename; unsigned int size; }; class WordDBCaches { public: inline WordDBCaches(WordList* nwords, int nfile_max, int size_hint, int nsize_max) : cache(nwords->GetContext()) { words = nwords; files = new WordDB(words->GetContext()->GetDBInfo()); files->Open(words->Filename(), "tmp", DB_BTREE, words->Flags(), 0666, WORD_DB_FILES); file_max = nfile_max; size_max = nsize_max; lock = 0; cache.SetMax(size_hint / 2); } ~WordDBCaches() { delete files; } int Full() const { return size_max > 0 ? size >= size_max : 0; } int Add(char* key, int key_size, char* data, int data_size); int AddFile(String& filename); int CacheFlush(); int Merge(); int Merge(const String& filea, const String& fileb, const String& tmpname); int Merge(WordDB& db); int CacheWrite(const String& filename); int CacheCompare(int (*compare)(WordContext *, const WordDBCacheEntry *, const WordDBCacheEntry *)) { cache.SetCompare(compare); return OK; } int WriteEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size); int ReadEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size); private: WordList* words; WordDB* files; int file_max; int size_max; int size; WordLock* lock; WordDBCache cache; }; #endif /* _WordDBCache_h */