ksycocadict.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "ksycocadict.h"
00020 #include "ksycocaentry.h"
00021 #include "ksycoca.h"
00022
00023 #include <qptrlist.h>
00024 #include <qvaluelist.h>
00025 #include <kdebug.h>
00026 #include <stdlib.h>
00027
00028 struct string_entry {
00029 string_entry(QString _key, KSycocaEntry *_payload)
00030 { keyStr = _key; key = keyStr.unicode(); length = keyStr.length(); payload = _payload; hash = 0; }
00031 uint hash;
00032 int length;
00033 const QChar *key;
00034 QString keyStr;
00035 KSycocaEntry *payload;
00036 };
00037
00038 template class QPtrList<string_entry>;
00039
00040 class KSycocaDictStringList : public QPtrList<string_entry>
00041 {
00042 public:
00043 KSycocaDictStringList();
00044 };
00045
00046 KSycocaDictStringList::KSycocaDictStringList()
00047 {
00048 setAutoDelete(true);
00049 }
00050
00051 KSycocaDict::KSycocaDict()
00052 : d(0), mStr(0), mOffset(0)
00053 {
00054 }
00055
00056 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00057 : d(0), mStr(str), mOffset(offset)
00058 {
00059 Q_UINT32 test1, test2;
00060 str->device()->at(offset);
00061 (*str) >> test1 >> test2;
00062 if ((test1 > 0x000fffff) || (test2 > 1024))
00063 {
00064 KSycoca::flagError();
00065 mHashTableSize = 0;
00066 mOffset = 0;
00067 return;
00068 }
00069
00070 str->device()->at(offset);
00071 (*str) >> mHashTableSize;
00072 (*str) >> mHashList;
00073 mOffset = str->device()->at();
00074 }
00075
00076 KSycocaDict::~KSycocaDict()
00077 {
00078 delete d;
00079 }
00080
00081 void
00082 KSycocaDict::add(const QString &key, KSycocaEntry *payload)
00083 {
00084 if (key.isEmpty()) return;
00085 if (!payload) return;
00086 if (!d)
00087 {
00088 d = new KSycocaDictStringList();
00089 }
00090
00091 string_entry *entry= new string_entry(key, payload);
00092 d->append(entry);
00093 }
00094
00095 int
00096 KSycocaDict::find_string(const QString &key )
00097 {
00098
00099
00100 if (!mStr || !mOffset)
00101 {
00102 kdError(7011) << "No database available!" << endl;
00103 return 0;
00104 }
00105
00106 if (mHashTableSize == 0)
00107 return 0;
00108
00109
00110 uint hash = hashKey(key) % mHashTableSize;
00111
00112
00113 uint off = mOffset+sizeof(Q_INT32)*hash;
00114
00115 mStr->device()->at( off );
00116
00117 Q_INT32 offset;
00118 (*mStr) >> offset;
00119
00120
00121 if (offset == 0)
00122 return 0;
00123
00124 if (offset > 0)
00125 return offset;
00126
00127
00128 offset = -offset;
00129
00130 mStr->device()->at(offset);
00131
00132
00133 while(true)
00134 {
00135 (*mStr) >> offset;
00136 if (offset == 0) break;
00137 QString dupkey;
00138 (*mStr) >> dupkey;
00139
00140 if (dupkey == key) return offset;
00141 }
00142
00143
00144 return 0;
00145 }
00146
00147 uint
00148 KSycocaDict::count()
00149 {
00150 if (!d) return 0;
00151
00152 return d->count();
00153 }
00154
00155 void
00156 KSycocaDict::clear()
00157 {
00158 delete d;
00159 d = 0;
00160 }
00161
00162 uint
00163 KSycocaDict::hashKey( const QString &key)
00164 {
00165 int l = key.length();
00166 register uint h = 0;
00167
00168 for(uint i = 0; i < mHashList.count(); i++)
00169 {
00170 int pos = mHashList[i];
00171 if (pos < 0)
00172 {
00173 pos = -pos-1;
00174 if (pos < l)
00175 h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
00176 }
00177 else
00178 {
00179 pos = pos-1;
00180 if (pos < l)
00181 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00182 }
00183 }
00184 return h;
00185 }
00186
00187
00188
00189 int
00190 calcDiversity(KSycocaDictStringList *d, int pos, int sz)
00191 {
00192 if (pos == 0) return 0;
00193 bool *matrix = (bool *) calloc(sz, sizeof(bool));
00194 uint usz = sz;
00195
00196 if (pos < 0)
00197 {
00198 pos = -pos-1;
00199 for(string_entry *entry=d->first(); entry; entry = d->next())
00200 {
00201 register int l = entry->length;
00202 if (pos < l && pos != 0)
00203 {
00204 register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
00205 matrix[ hash % usz ] = true;
00206 }
00207 }
00208 }
00209 else
00210 {
00211 pos = pos-1;
00212 for(string_entry *entry=d->first(); entry; entry = d->next())
00213 {
00214 if (pos < entry->length)
00215 {
00216 register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00217 matrix[ hash % usz ] = true;
00218 }
00219 }
00220 }
00221 int diversity = 0;
00222 for(int i=0;i< sz;i++)
00223 if (matrix[i]) diversity++;
00224
00225 free((void *) matrix);
00226
00227 return diversity;
00228 }
00229
00230
00231
00232 void
00233 addDiversity(KSycocaDictStringList *d, int pos)
00234 {
00235 if (pos == 0) return;
00236 if (pos < 0)
00237 {
00238 pos = -pos-1;
00239 for(string_entry *entry=d->first(); entry; entry = d->next())
00240 {
00241 register int l = entry->length;
00242 if (pos < l)
00243 entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00244 }
00245 }
00246 else
00247 {
00248 pos = pos - 1;
00249 for(string_entry *entry=d->first(); entry; entry = d->next())
00250 {
00251 if (pos < entry->length)
00252 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00253 }
00254 }
00255 }
00256
00257
00258 void
00259 KSycocaDict::save(QDataStream &str)
00260 {
00261 if (count() == 0)
00262 {
00263 mHashTableSize = 0;
00264 mHashList.clear();
00265 str << mHashTableSize;
00266 str << mHashList;
00267 return;
00268 }
00269
00270 mOffset = str.device()->at();
00271
00272
00273
00274
00275
00276 int maxLength = 0;
00277
00278 for(string_entry *entry=d->first(); entry; entry = d->next())
00279 {
00280 entry->hash = 0;
00281 if (entry->length > maxLength)
00282 maxLength = entry->length;
00283 }
00284
00285
00286
00287
00288
00289
00290 register unsigned int sz = count()*4 + 1;
00291 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) sz+=2;
00292
00293 int maxDiv = 0;
00294 int maxPos = 0;
00295 int lastDiv = 0;
00296
00297 mHashList.clear();
00298
00299
00300
00301 int *oldvec=new int[maxLength*2+1];
00302 for (int i=0; i<(maxLength*2+1); i++) oldvec[i]=0;
00303 int mindiv=0;
00304
00305 while(true)
00306 {
00307 int divsum=0,divnum=0;
00308
00309 maxDiv = 0;
00310 maxPos = 0;
00311 for(int pos=-maxLength; pos <= maxLength; pos++)
00312 {
00313
00314 if (oldvec[pos+maxLength]<mindiv)
00315 { oldvec[pos+maxLength]=0; continue; }
00316
00317 int diversity = calcDiversity(d, pos, sz);
00318 if (diversity > maxDiv)
00319 {
00320 maxDiv = diversity;
00321 maxPos = pos;
00322 }
00323 oldvec[pos+maxLength]=diversity;
00324 divsum+=diversity; divnum++;
00325 }
00326
00327 if (divnum)
00328 mindiv=(3*divsum)/(4*divnum);
00329
00330 if (maxDiv <= lastDiv)
00331 break;
00332
00333 lastDiv = maxDiv;
00334 addDiversity(d, maxPos);
00335 mHashList.append(maxPos);
00336 }
00337
00338 delete [] oldvec;
00339
00340 for(string_entry *entry=d->first(); entry; entry = d->next())
00341 {
00342 entry->hash = hashKey(entry->keyStr);
00343 }
00344
00345
00346 mHashTableSize = sz;
00347
00348 struct hashtable_entry {
00349 string_entry *entry;
00350 QPtrList<string_entry> *duplicates;
00351 int duplicate_offset;
00352 };
00353
00354 hashtable_entry *hashTable = new hashtable_entry[ sz ];
00355
00356
00357 for (unsigned int i=0; i < sz; i++)
00358 {
00359 hashTable[i].entry = 0;
00360 hashTable[i].duplicates = 0;
00361 }
00362
00363
00364 for(string_entry *entry=d->first(); entry; entry = d->next())
00365 {
00366 int hash = entry->hash % sz;
00367 if (!hashTable[hash].entry)
00368 {
00369 hashTable[hash].entry = entry;
00370 }
00371 else
00372 {
00373 if (!hashTable[hash].duplicates)
00374 {
00375 hashTable[hash].duplicates = new QPtrList<string_entry>();
00376 hashTable[hash].duplicates->append(hashTable[hash].entry);
00377 hashTable[hash].duplicate_offset = 0;
00378 }
00379 hashTable[hash].duplicates->append(entry);
00380 }
00381 }
00382
00383 str << mHashTableSize;
00384 str << mHashList;
00385
00386 mOffset = str.device()->at();
00387
00388
00389 for(int pass = 1; pass <= 2; pass++)
00390 {
00391 str.device()->at(mOffset);
00392
00393 for(uint i=0; i < mHashTableSize; i++)
00394 {
00395 Q_INT32 tmpid;
00396 if (!hashTable[i].entry)
00397 tmpid = (Q_INT32) 0;
00398 else if (!hashTable[i].duplicates)
00399 tmpid = (Q_INT32) hashTable[i].entry->payload->offset();
00400 else
00401 tmpid = (Q_INT32) -hashTable[i].duplicate_offset;
00402 str << tmpid;
00403
00404 }
00405
00406
00407
00408 for(uint i=0; i < mHashTableSize; i++)
00409 {
00410 if (hashTable[i].duplicates)
00411 {
00412 QPtrList<string_entry> *dups = hashTable[i].duplicates;
00413 hashTable[i].duplicate_offset = str.device()->at();
00414
00415
00416
00417 for(string_entry *dup = dups->first(); dup; dup=dups->next())
00418 {
00419 str << (Q_INT32) dup->payload->offset();
00420 str << dup->keyStr;
00421 }
00422 str << (Q_INT32) 0;
00423 }
00424 }
00425
00426 }
00427
00428
00429 for(uint i=0; i < mHashTableSize; i++)
00430 {
00431 delete hashTable[i].duplicates;
00432 }
00433 delete [] hashTable;
00434 }
00435
This file is part of the documentation for kdelibs Version 3.1.4.