/* This file is part of the KDE libraries * Copyright (C) 1999 Waldo Bastian <bastian@kde.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License version 2 as published by the Free Software Foundation; * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. **/ #include "tdesycocadict.h" #include "tdesycocaentry.h" #include "tdesycoca.h" #include <tqptrlist.h> #include <tqvaluelist.h> #include <kdebug.h> #include <stdlib.h> namespace { struct string_entry { string_entry(TQString _key, KSycocaEntry *_payload) { keyStr = _key; key = keyStr.unicode(); length = keyStr.length(); payload = _payload; hash = 0; } uint hash; int length; const TQChar *key; TQString keyStr; KSycocaEntry *payload; }; } template class TQPtrList<string_entry>; class KSycocaDictStringList : public TQPtrList<string_entry> { public: KSycocaDictStringList(); }; KSycocaDictStringList::KSycocaDictStringList() { setAutoDelete(true); } KSycocaDict::KSycocaDict() : d(0), mStr(0), mOffset(0) { } KSycocaDict::KSycocaDict(TQDataStream *str, int offset) : d(0), mStr(str), mOffset(offset) { TQ_UINT32 test1, test2; str->device()->at(offset); (*str) >> test1 >> test2; if ((test1 > 0x000fffff) || (test2 > 1024)) { KSycoca::flagError(); mHashTableSize = 0; mOffset = 0; return; } str->device()->at(offset); (*str) >> mHashTableSize; (*str) >> mHashList; mOffset = str->device()->at(); // Start of hashtable } KSycocaDict::~KSycocaDict() { delete d; } void KSycocaDict::add(const TQString &key, KSycocaEntry *payload) { if (key.isEmpty()) return; // Not allowed (should never happen) if (!payload) return; // Not allowed! if (!d) { d = new KSycocaDictStringList(); } string_entry *entry= new string_entry(key, payload); d->append(entry); } void KSycocaDict::remove(const TQString &key) { if (d) { for(string_entry *entry=d->first(); entry; entry = d->next()) { if (entry->keyStr == key) { d->remove(); break; } } } } int KSycocaDict::find_string(const TQString &key ) { //kdDebug(7011) << TQString("KSycocaDict::find_string(%1)").arg(key) << endl; if (!mStr || !mOffset) { kdError(7011) << "No database available!" << endl; return 0; } if (mHashTableSize == 0) return 0; // Unlikely to find anything :-] // Read hash-table data uint hash = hashKey(key) % mHashTableSize; //kdDebug(7011) << TQString("hash is %1").arg(hash) << endl; uint off = mOffset+sizeof(TQ_INT32)*hash; //kdDebug(7011) << TQString("off is %1").arg(off,8,16) << endl; mStr->device()->at( off ); TQ_INT32 offset; (*mStr) >> offset; //kdDebug(7011) << TQString("offset is %1").arg(offset,8,16) << endl; if (offset == 0) return 0; if (offset > 0) return offset; // Positive ID // Lookup duplicate list. offset = -offset; mStr->device()->at(offset); //kdDebug(7011) << TQString("Looking up duplicate list at %1").arg(offset,8,16) << endl; while(true) { (*mStr) >> offset; if (offset == 0) break; TQString dupkey; (*mStr) >> dupkey; //kdDebug(7011) << TQString(">> %1 %2").arg(offset,8,16).arg(dupkey) << endl; if (dupkey == key) return offset; } //kdWarning(7011) << "Not found!" << endl; return 0; } uint KSycocaDict::count() { if (!d) return 0; return d->count(); } void KSycocaDict::clear() { delete d; d = 0; } uint KSycocaDict::hashKey( const TQString &key) { int l = key.length(); uint h = 0; for(uint i = 0; i < mHashList.count(); i++) { int pos = mHashList[i]; if (pos < 0) { pos = -pos-1; if (pos < l) h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff; } else { pos = pos-1; if (pos < l) h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff; } } return h; } // // Calculate the diversity of the strings at position 'pos' static int calcDiversity(KSycocaDictStringList *d, int pos, int sz) { if (pos == 0) return 0; bool *matrix = (bool *) calloc(sz, sizeof(bool)); uint usz = sz; if (pos < 0) { pos = -pos-1; for(string_entry *entry=d->first(); entry; entry = d->next()) { int l = entry->length; if (pos < l && pos != 0) { uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff; matrix[ hash % usz ] = true; } } } else { pos = pos-1; for(string_entry *entry=d->first(); entry; entry = d->next()) { if (pos < entry->length) { uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff; matrix[ hash % usz ] = true; } } } int diversity = 0; for(int i=0;i< sz;i++) if (matrix[i]) diversity++; free((void *) matrix); return diversity; } // // Add the diversity of the strings at position 'pos' static void addDiversity(KSycocaDictStringList *d, int pos) { if (pos == 0) return; if (pos < 0) { pos = -pos-1; for(string_entry *entry=d->first(); entry; entry = d->next()) { int l = entry->length; if (pos < l) entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff; } } else { pos = pos - 1; for(string_entry *entry=d->first(); entry; entry = d->next()) { if (pos < entry->length) entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff; } } } void KSycocaDict::save(TQDataStream &str) { if (count() == 0) { mHashTableSize = 0; mHashList.clear(); str << mHashTableSize; str << mHashList; return; } mOffset = str.device()->at(); //kdDebug(7011) << TQString("KSycocaDict: %1 entries.").arg(count()) << endl; //kdDebug(7011) << "Calculating hash keys.." << endl; int maxLength = 0; //kdDebug(7011) << "Finding maximum string length" << endl; for(string_entry *entry=d->first(); entry; entry = d->next()) { entry->hash = 0; if (entry->length > maxLength) maxLength = entry->length; } //kdDebug(7011) << TQString("Max string length = %1").arg(maxLength) << endl; // use "almost prime" number for sz (to calculate diversity) and later // for the table size of big tables // int sz = d->count()*5-1; unsigned int sz = count()*4 + 1; while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) sz+=2; int maxDiv = 0; int maxPos = 0; int lastDiv = 0; mHashList.clear(); // try to limit diversity scan by "predicting" positions // with high diversity int *oldvec=new int[maxLength*2+1]; for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0; int mindiv=0; while(true) { int divsum=0,divnum=0; maxDiv = 0; maxPos = 0; for(int pos=-maxLength; pos <= maxLength; pos++) { // cut off if (oldvec[pos+maxLength]<mindiv) { oldvec[pos+maxLength]=0; continue; } int diversity = calcDiversity(d, pos, sz); if (diversity > maxDiv) { maxDiv = diversity; maxPos = pos; } oldvec[pos+maxLength]=diversity; divsum+=diversity; divnum++; } // arbitrary cut-off value 3/4 of average seems to work if (divnum) mindiv=(3*divsum)/(4*divnum); if (maxDiv <= lastDiv) break; // tqWarning("Max Div = %d at pos %d", maxDiv, maxPos); lastDiv = maxDiv; addDiversity(d, maxPos); mHashList.append(maxPos); } delete [] oldvec; for(string_entry *entry=d->first(); entry; entry = d->next()) { entry->hash = hashKey(entry->keyStr); } // fprintf(stderr, "Calculating minimum table size..\n"); mHashTableSize = sz; struct hashtable_entry { string_entry *entry; TQPtrList<string_entry> *duplicates; int duplicate_offset; }; hashtable_entry *hashTable = new hashtable_entry[ sz ]; //kdDebug(7011) << "Clearing hashtable..." << endl; for (unsigned int i=0; i < sz; i++) { hashTable[i].entry = 0; hashTable[i].duplicates = 0; } //kdDebug(7011) << "Filling hashtable..." << endl; for(string_entry *entry=d->first(); entry; entry = d->next()) { int hash = entry->hash % sz; if (!hashTable[hash].entry) { // First entry hashTable[hash].entry = entry; } else { if (!hashTable[hash].duplicates) { // Second entry, build duplicate list. hashTable[hash].duplicates = new TQPtrList<string_entry>(); hashTable[hash].duplicates->append(hashTable[hash].entry); hashTable[hash].duplicate_offset = 0; } hashTable[hash].duplicates->append(entry); } } str << mHashTableSize; str << mHashList; mOffset = str.device()->at(); // mOffset points to start of hashTable //kdDebug(7011) << TQString("Start of Hash Table, offset = %1").arg(mOffset,8,16) << endl; for(int pass = 1; pass <= 2; pass++) { str.device()->at(mOffset); //kdDebug(7011) << TQString("Writing hash table (pass #%1)").arg(pass) << endl; for(uint i=0; i < mHashTableSize; i++) { TQ_INT32 tmpid; if (!hashTable[i].entry) tmpid = (TQ_INT32) 0; else if (!hashTable[i].duplicates) tmpid = (TQ_INT32) hashTable[i].entry->payload->offset(); // Positive ID else tmpid = (TQ_INT32) -hashTable[i].duplicate_offset; // Negative ID str << tmpid; //kdDebug(7011) << TQString("Hash table : %1").arg(tmpid,8,16) << endl; } //kdDebug(7011) << TQString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16) << endl; //kdDebug(7011) << TQString("Writing duplicate lists (pass #%1)").arg(pass) << endl; for(uint i=0; i < mHashTableSize; i++) { if (hashTable[i].duplicates) { TQPtrList<string_entry> *dups = hashTable[i].duplicates; hashTable[i].duplicate_offset = str.device()->at(); /*kdDebug(7011) << TQString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()) << endl; */ for(string_entry *dup = dups->first(); dup; dup=dups->next()) { str << (TQ_INT32) dup->payload->offset(); // Positive ID str << dup->keyStr; // Key (TQString) } str << (TQ_INT32) 0; // End of list marker (0) } } //kdDebug(7011) << TQString("End of Dict, offset = %1").arg(str.device()->at(),8,16) << endl; } //kdDebug(7011) << "Cleaning up hash table." << endl; for(uint i=0; i < mHashTableSize; i++) { delete hashTable[i].duplicates; } delete [] hashTable; }