summaryrefslogtreecommitdiffstats
path: root/tdecore/tdesycocadict.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tdecore/tdesycocadict.cpp')
-rw-r--r--tdecore/tdesycocadict.cpp454
1 files changed, 454 insertions, 0 deletions
diff --git a/tdecore/tdesycocadict.cpp b/tdecore/tdesycocadict.cpp
new file mode 100644
index 000000000..702c163a7
--- /dev/null
+++ b/tdecore/tdesycocadict.cpp
@@ -0,0 +1,454 @@
+/* This file is part of the KDE libraries
+ * Copyright (C) 1999 Waldo Bastian <[email protected]>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License version 2 as published by the Free Software Foundation;
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ **/
+
+#include "tdesycocadict.h"
+#include "tdesycocaentry.h"
+#include "tdesycoca.h"
+
+#include <tqptrlist.h>
+#include <tqvaluelist.h>
+#include <kdebug.h>
+#include <stdlib.h>
+
+namespace
+{
+struct string_entry {
+ string_entry(TQString _key, KSycocaEntry *_payload)
+ { keyStr = _key; key = keyStr.unicode(); length = keyStr.length(); payload = _payload; hash = 0; }
+ uint hash;
+ int length;
+ const TQChar *key;
+ TQString keyStr;
+ KSycocaEntry *payload;
+};
+}
+
+template class TQPtrList<string_entry>;
+
+class KSycocaDictStringList : public TQPtrList<string_entry>
+{
+public:
+ KSycocaDictStringList();
+};
+
+KSycocaDictStringList::KSycocaDictStringList()
+{
+ setAutoDelete(true);
+}
+
+KSycocaDict::KSycocaDict()
+ : d(0), mStr(0), mOffset(0)
+{
+}
+
+KSycocaDict::KSycocaDict(TQDataStream *str, int offset)
+ : d(0), mStr(str), mOffset(offset)
+{
+ TQ_UINT32 test1, test2;
+ str->device()->at(offset);
+ (*str) >> test1 >> test2;
+ if ((test1 > 0x000fffff) || (test2 > 1024))
+ {
+ KSycoca::flagError();
+ mHashTableSize = 0;
+ mOffset = 0;
+ return;
+ }
+
+ str->device()->at(offset);
+ (*str) >> mHashTableSize;
+ (*str) >> mHashList;
+ mOffset = str->device()->at(); // Start of hashtable
+}
+
+KSycocaDict::~KSycocaDict()
+{
+ delete d;
+}
+
+void
+KSycocaDict::add(const TQString &key, KSycocaEntry *payload)
+{
+ if (key.isEmpty()) return; // Not allowed (should never happen)
+ if (!payload) return; // Not allowed!
+ if (!d)
+ {
+ d = new KSycocaDictStringList();
+ }
+
+ string_entry *entry= new string_entry(key, payload);
+ d->append(entry);
+}
+
+void
+KSycocaDict::remove(const TQString &key)
+{
+ if (d)
+ {
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ if (entry->keyStr == key)
+ {
+ d->remove();
+ break;
+ }
+ }
+ }
+}
+
+int
+KSycocaDict::find_string(const TQString &key )
+{
+ //kdDebug(7011) << TQString("KSycocaDict::find_string(%1)").arg(key) << endl;
+
+ if (!mStr || !mOffset)
+ {
+ kdError(7011) << "No database available!" << endl;
+ return 0;
+ }
+
+ if (mHashTableSize == 0)
+ return 0; // Unlikely to find anything :-]
+
+ // Read hash-table data
+ uint hash = hashKey(key) % mHashTableSize;
+ //kdDebug(7011) << TQString("hash is %1").arg(hash) << endl;
+
+ uint off = mOffset+sizeof(TQ_INT32)*hash;
+ //kdDebug(7011) << TQString("off is %1").arg(off,8,16) << endl;
+ mStr->device()->at( off );
+
+ TQ_INT32 offset;
+ (*mStr) >> offset;
+
+ //kdDebug(7011) << TQString("offset is %1").arg(offset,8,16) << endl;
+ if (offset == 0)
+ return 0;
+
+ if (offset > 0)
+ return offset; // Positive ID
+
+ // Lookup duplicate list.
+ offset = -offset;
+
+ mStr->device()->at(offset);
+ //kdDebug(7011) << TQString("Looking up duplicate list at %1").arg(offset,8,16) << endl;
+
+ while(true)
+ {
+ (*mStr) >> offset;
+ if (offset == 0) break;
+ TQString dupkey;
+ (*mStr) >> dupkey;
+ //kdDebug(7011) << TQString(">> %1 %2").arg(offset,8,16).arg(dupkey) << endl;
+ if (dupkey == key) return offset;
+ }
+ //kdWarning(7011) << "Not found!" << endl;
+
+ return 0;
+}
+
+uint
+KSycocaDict::count()
+{
+ if (!d) return 0;
+
+ return d->count();
+}
+
+void
+KSycocaDict::clear()
+{
+ delete d;
+ d = 0;
+}
+
+uint
+KSycocaDict::hashKey( const TQString &key)
+{
+ int l = key.length();
+ register uint h = 0;
+
+ for(uint i = 0; i < mHashList.count(); i++)
+ {
+ int pos = mHashList[i];
+ if (pos < 0)
+ {
+ pos = -pos-1;
+ if (pos < l)
+ h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
+ }
+ else
+ {
+ pos = pos-1;
+ if (pos < l)
+ h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
+ }
+ }
+ return h;
+}
+
+//
+// Calculate the diversity of the strings at position 'pos'
+static int
+calcDiversity(KSycocaDictStringList *d, int pos, int sz)
+{
+ if (pos == 0) return 0;
+ bool *matrix = (bool *) calloc(sz, sizeof(bool));
+ uint usz = sz;
+
+ if (pos < 0)
+ {
+ pos = -pos-1;
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ register int l = entry->length;
+ if (pos < l && pos != 0)
+ {
+ register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
+ matrix[ hash % usz ] = true;
+ }
+ }
+ }
+ else
+ {
+ pos = pos-1;
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ if (pos < entry->length)
+ {
+ register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
+ matrix[ hash % usz ] = true;
+ }
+ }
+ }
+ int diversity = 0;
+ for(int i=0;i< sz;i++)
+ if (matrix[i]) diversity++;
+
+ free((void *) matrix);
+
+ return diversity;
+}
+
+//
+// Add the diversity of the strings at position 'pos'
+static void
+addDiversity(KSycocaDictStringList *d, int pos)
+{
+ if (pos == 0) return;
+ if (pos < 0)
+ {
+ pos = -pos-1;
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ register int l = entry->length;
+ if (pos < l)
+ entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
+ }
+ }
+ else
+ {
+ pos = pos - 1;
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ if (pos < entry->length)
+ entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
+ }
+ }
+}
+
+
+void
+KSycocaDict::save(TQDataStream &str)
+{
+ if (count() == 0)
+ {
+ mHashTableSize = 0;
+ mHashList.clear();
+ str << mHashTableSize;
+ str << mHashList;
+ return;
+ }
+
+ mOffset = str.device()->at();
+
+ //kdDebug(7011) << TQString("KSycocaDict: %1 entries.").arg(count()) << endl;
+
+ //kdDebug(7011) << "Calculating hash keys.." << endl;
+
+ int maxLength = 0;
+ //kdDebug(7011) << "Finding maximum string length" << endl;
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ entry->hash = 0;
+ if (entry->length > maxLength)
+ maxLength = entry->length;
+ }
+
+ //kdDebug(7011) << TQString("Max string length = %1").arg(maxLength) << endl;
+
+ // use "almost prime" number for sz (to calculate diversity) and later
+ // for the table size of big tables
+ // int sz = d->count()*5-1;
+ register unsigned int sz = count()*4 + 1;
+ while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) sz+=2;
+
+ int maxDiv = 0;
+ int maxPos = 0;
+ int lastDiv = 0;
+
+ mHashList.clear();
+
+ // try to limit diversity scan by "predicting" positions
+ // with high diversity
+ int *oldvec=new int[maxLength*2+1];
+ for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0;
+ int mindiv=0;
+
+ while(true)
+ {
+ int divsum=0,divnum=0;
+
+ maxDiv = 0;
+ maxPos = 0;
+ for(int pos=-maxLength; pos <= maxLength; pos++)
+ {
+ // cut off
+ if (oldvec[pos+maxLength]<mindiv)
+ { oldvec[pos+maxLength]=0; continue; }
+
+ int diversity = calcDiversity(d, pos, sz);
+ if (diversity > maxDiv)
+ {
+ maxDiv = diversity;
+ maxPos = pos;
+ }
+ oldvec[pos+maxLength]=diversity;
+ divsum+=diversity; divnum++;
+ }
+ // arbitrary cut-off value 3/4 of average seems to work
+ if (divnum)
+ mindiv=(3*divsum)/(4*divnum);
+
+ if (maxDiv <= lastDiv)
+ break;
+ // tqWarning("Max Div = %d at pos %d", maxDiv, maxPos);
+ lastDiv = maxDiv;
+ addDiversity(d, maxPos);
+ mHashList.append(maxPos);
+ }
+
+ delete [] oldvec;
+
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ entry->hash = hashKey(entry->keyStr);
+ }
+// fprintf(stderr, "Calculating minimum table size..\n");
+
+ mHashTableSize = sz;
+
+ struct hashtable_entry {
+ string_entry *entry;
+ TQPtrList<string_entry> *duplicates;
+ int duplicate_offset;
+ };
+
+ hashtable_entry *hashTable = new hashtable_entry[ sz ];
+
+ //kdDebug(7011) << "Clearing hashtable..." << endl;
+ for (unsigned int i=0; i < sz; i++)
+ {
+ hashTable[i].entry = 0;
+ hashTable[i].duplicates = 0;
+ }
+
+ //kdDebug(7011) << "Filling hashtable..." << endl;
+ for(string_entry *entry=d->first(); entry; entry = d->next())
+ {
+ int hash = entry->hash % sz;
+ if (!hashTable[hash].entry)
+ { // First entry
+ hashTable[hash].entry = entry;
+ }
+ else
+ {
+ if (!hashTable[hash].duplicates)
+ { // Second entry, build duplicate list.
+ hashTable[hash].duplicates = new TQPtrList<string_entry>();
+ hashTable[hash].duplicates->append(hashTable[hash].entry);
+ hashTable[hash].duplicate_offset = 0;
+ }
+ hashTable[hash].duplicates->append(entry);
+ }
+ }
+
+ str << mHashTableSize;
+ str << mHashList;
+
+ mOffset = str.device()->at(); // mOffset points to start of hashTable
+ //kdDebug(7011) << TQString("Start of Hash Table, offset = %1").arg(mOffset,8,16) << endl;
+
+ for(int pass = 1; pass <= 2; pass++)
+ {
+ str.device()->at(mOffset);
+ //kdDebug(7011) << TQString("Writing hash table (pass #%1)").arg(pass) << endl;
+ for(uint i=0; i < mHashTableSize; i++)
+ {
+ TQ_INT32 tmpid;
+ if (!hashTable[i].entry)
+ tmpid = (TQ_INT32) 0;
+ else if (!hashTable[i].duplicates)
+ tmpid = (TQ_INT32) hashTable[i].entry->payload->offset(); // Positive ID
+ else
+ tmpid = (TQ_INT32) -hashTable[i].duplicate_offset; // Negative ID
+ str << tmpid;
+ //kdDebug(7011) << TQString("Hash table : %1").arg(tmpid,8,16) << endl;
+ }
+ //kdDebug(7011) << TQString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16) << endl;
+
+ //kdDebug(7011) << TQString("Writing duplicate lists (pass #%1)").arg(pass) << endl;
+ for(uint i=0; i < mHashTableSize; i++)
+ {
+ if (hashTable[i].duplicates)
+ {
+ TQPtrList<string_entry> *dups = hashTable[i].duplicates;
+ hashTable[i].duplicate_offset = str.device()->at();
+
+ /*kdDebug(7011) << TQString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()) << endl;
+*/
+ for(string_entry *dup = dups->first(); dup; dup=dups->next())
+ {
+ str << (TQ_INT32) dup->payload->offset(); // Positive ID
+ str << dup->keyStr; // Key (TQString)
+ }
+ str << (TQ_INT32) 0; // End of list marker (0)
+ }
+ }
+ //kdDebug(7011) << TQString("End of Dict, offset = %1").arg(str.device()->at(),8,16) << endl;
+ }
+
+ //kdDebug(7011) << "Cleaning up hash table." << endl;
+ for(uint i=0; i < mHashTableSize; i++)
+ {
+ delete hashTable[i].duplicates;
+ }
+ delete [] hashTable;
+}
+