The Pedigree Project  0.1
HashTable.h
1 /*
2  * Copyright (c) 2008-2014, Pedigree Developers
3  *
4  * Please see the CONTRIB file in the root of the source tree for a full
5  * list of contributors.
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #ifndef KERNEL_UTILITIES_HASHTABLE_H
21 #define KERNEL_UTILITIES_HASHTABLE_H
22 
23 #include "pedigree/kernel/processor/types.h"
24 #include "pedigree/kernel/utilities/Iterator.h"
25 #include "pedigree/kernel/utilities/Pair.h"
26 #include "pedigree/kernel/utilities/Result.h"
27 #include "pedigree/kernel/utilities/utility.h"
28 
32 namespace HashTableError
33 {
34 enum Error
35 {
36  HashTableEmpty,
37  NotFound,
38  IterationComplete
39 };
40 }
41 
63 template <
64  class K, class V, class SiblingK = K, size_t InitialBuckets = 4,
65  bool QuadraticProbe = true, size_t GrowthFactor = 2>
66 class HashTable
67 {
68  private:
69  typedef HashTable<
70  K, V, SiblingK, InitialBuckets, QuadraticProbe, GrowthFactor>
71  SelfType;
72 
73  static_assert(
74  InitialBuckets > 0, "At least one initial bucket must be available.");
75 
76  struct bucket
77  {
78  bucket() : key(), value(), set(false), parent(nullptr)
79  {
80  }
81 
82  K key;
83  V value;
84  bool set;
85  HashTable *parent;
86 
87  struct bucket *next()
88  {
89  if (!parent)
90  {
91  return nullptr;
92  }
93 
94  struct bucket *node = this;
95  while (++node < (parent->m_Buckets + parent->m_nBuckets))
96  {
97  if (node->set)
98  {
99  return node;
100  }
101  }
102 
103  return nullptr;
104  }
105 
106  struct bucket *previous()
107  {
108  if (!parent)
109  {
110  return nullptr;
111  }
112 
113  struct bucket *node = this;
114  while (--node >= parent->m_Buckets)
115  {
116  if (node->set)
117  {
118  return node;
119  }
120  }
121 
122  return nullptr;
123  }
124  };
125 
126  public:
127  typedef ::Iterator<V, struct bucket> Iterator;
128  typedef typename Iterator::Const ConstIterator;
129 
131  typedef Result<Pair<K, V>, HashTableError::Error> PairLookupResult;
132 
133  HashTable()
134  : m_Buckets(nullptr), m_Default(), m_nBuckets(0), m_nItems(0),
135  m_nMask(0)
136  {
137  }
138 
142  HashTable(const V &customDefault) : HashTable()
143  {
144  m_Default = customDefault;
145  }
146 
150  void clear()
151  {
152  delete[] m_Buckets;
153  m_Buckets = nullptr;
154  m_nItems = 0;
155  }
156 
157  ~HashTable()
158  {
159  clear();
160  }
161 
165  bool contains(const K &k) const
166  {
167  if ((!m_Buckets) || (!m_nItems))
168  {
169  return false;
170  }
171 
172  const uint32_t khash = k.hash();
173  size_t hash = khash & m_nMask;
174 
175  const bucket *b = &m_Buckets[hash];
176  if (!b->set)
177  {
178  return false;
179  }
180 
181  if (b->key != k)
182  {
183  b = findNextSet(hash, k, khash);
184  if (!b)
185  {
186  return false;
187  }
188  }
189 
190  return true;
191  }
192 
200  LookupResult lookup(const K &k) const
201  {
202  HashTableError::Error err;
203  const struct bucket *b = doLookup<K>(k, err);
204  if (b)
205  {
206  return LookupResult::withValue(b->value);
207  }
208  else
209  {
210  return LookupResult::withError(err);
211  }
212  }
213 
214  template <typename SK = SiblingK>
215  typename pedigree_std::enable_if<
216  !pedigree_std::is_same<K, SK>::value, LookupResult>::type
217  lookup(const SK &k) const
218  {
219  HashTableError::Error err;
220  const struct bucket *b = doLookup<SK>(k, err);
221  if (b)
222  {
223  return LookupResult::withValue(b->value);
224  }
225  else
226  {
227  return LookupResult::withError(err);
228  }
229  }
230 
238  PairLookupResult getNth(size_t n) const
239  {
240  if (n < count())
241  {
242  size_t j = 0;
243  for (size_t i = 0; i < m_nBuckets; ++i)
244  {
245  if (!m_Buckets[i].set)
246  {
247  continue;
248  }
249 
250  if (j++ == n)
251  {
252  Pair<K, V> result(m_Buckets[i].key, m_Buckets[i].value);
253  return PairLookupResult::withValue(result);
254  }
255  }
256  }
257 
258  return PairLookupResult::withError(HashTableError::IterationComplete);
259  }
260 
264  bool insert(const K &k, const V &v)
265  {
266  check();
267 
268  // Ensure we have space for the new item
269  reserve(m_nItems + 1);
270 
271  const uint32_t khash = k.hash();
272  size_t hash = khash & m_nMask;
273 
274  // Do we need to chain?
275  bucket *b = &m_Buckets[hash];
276  if (b->set)
277  {
278  // If key matches, this is more than just a hash collision.
279  if (b->key == k)
280  {
281  return false;
282  }
283 
284  // Probe for an empty bucket.
285  b = findNextEmpty(hash);
286  if (!b)
287  {
288  return false;
289  }
290  }
291 
292  b->set = true;
293  b->key = k;
294  b->key.hash(); // precompute hash
295  b->value = v;
296  bool wasEmpty = m_nItems == 0;
297  ++m_nItems;
298 
299  return true;
300  }
301 
305  bool update(const K &k, const V &v)
306  {
307  check();
308 
309  HashTableError::Error err;
310  struct bucket *b = const_cast<struct bucket *>(doLookup(k, err));
311  if (!b)
312  {
313  return false;
314  }
315 
316  b->value = v;
317  return true;
318  }
319 
323  void remove(const K &k)
324  {
325  if (!m_Buckets)
326  {
327  return;
328  }
329 
330  const uint32_t khash = k.hash();
331  size_t hash = khash & m_nMask;
332 
333  bucket *b = &m_Buckets[hash];
334  if (!b->set)
335  {
336  return;
337  }
338 
339  bool didRemove = false;
340 
341  if (b->key == k)
342  {
343  b->set = false;
344  b->value = m_Default;
345  didRemove = true;
346  }
347  else
348  {
349  b = findNextSet(hash, k, khash);
350  if (b)
351  {
352  b->set = false;
353  b->value = m_Default;
354  didRemove = true;
355  }
356  }
357 
358  if (didRemove)
359  {
360  --m_nItems;
361 
362  if (m_nItems)
363  {
364  // Must rehash as we use linear probing for collision handling.
365  rehash();
366  }
367  }
368  }
369 
373  void reserve(size_t numItems)
374  {
375  check();
376 
377  if (numItems < m_nBuckets)
378  {
379  // No resize necessary, reserve is completely contained within the
380  // current bucket array.
381  return;
382  }
383 
384  if (!numItems)
385  {
386  ++numItems;
387  }
388 
389  // Round up to next power of two (if not already)
390  size_t nextpow2 = 1ULL << (64 - __builtin_clzll(numItems));
391  size_t numItemsRounded = max(InitialBuckets, nextpow2);
392  if (m_nBuckets == numItemsRounded)
393  {
394  // no need to resize here
395  return;
396  }
397 
398  // Handle resize and associated rehash if the table is full.
399  size_t origCount = m_nBuckets;
400  m_nBuckets = numItemsRounded;
401  m_nMask = m_nBuckets - 1;
402  if (m_nItems)
403  {
404  rehash(origCount);
405  }
406  else
407  {
408  // No items in the array, just recreate it here
409  delete[] m_Buckets;
410  m_Buckets = new bucket[m_nBuckets];
411  setDefaults(true);
412  }
413  }
414 
415  size_t count() const
416  {
417  return m_nItems;
418  }
419 
420  Iterator begin()
421  {
422  return m_nItems ? Iterator(getFirstSetBucket()) : end();
423  }
424 
425  ConstIterator begin() const
426  {
427  return m_nItems ? ConstIterator(getFirstSetBucket()) : end();
428  }
429 
430  Iterator end()
431  {
432  return Iterator(0);
433  }
434 
435  ConstIterator end() const
436  {
437  return ConstIterator(0);
438  }
439 
443  Iterator erase(Iterator &at)
444  {
445  struct bucket *node = at.__getNode();
446  if (node && node->set)
447  {
448  node->set = false;
449  --m_nItems;
450  rehash();
451 
452  // This is the only safe way to continue iterating - the rehash in
453  // remove makes everything else incorrect.
454  return begin();
455  }
456  else
457  {
458  return at;
459  }
460  }
461 
468  HashTable(const SelfType &other) = delete;
469  SelfType &operator=(const SelfType &p) = delete;
470 
477  void copyFrom(const SelfType &other)
478  {
479  clear();
480 
481  m_Default = other.m_Default;
482  m_nBuckets = other.m_nBuckets;
483  m_nItems = other.m_nItems;
484  m_nMask = other.m_nMask;
485  m_Buckets = new bucket[m_nBuckets];
486  pedigree_std::copy(m_Buckets, other.m_Buckets, m_nBuckets);
487  resetParents();
488  }
489 
490  SelfType &operator=(SelfType &&p)
491  {
492  clear();
493 
494  m_Default = pedigree_std::move(p.m_Default);
495  m_nBuckets = pedigree_std::move(p.m_nBuckets);
496  m_nItems = pedigree_std::move(p.m_nItems);
497  m_nMask = pedigree_std::move(p.m_nMask);
498  m_Buckets = pedigree_std::move(p.m_Buckets);
499 
500  p.m_Buckets = nullptr;
501  p.clear();
502 
503  return *this;
504  }
505 
506  private:
507  struct bucket *getFirstSetBucket() const
508  {
509  struct bucket *result = m_Buckets;
510  while (result < (m_Buckets + m_nBuckets))
511  {
512  if (result->set)
513  {
514  return result;
515  }
516 
517  ++result;
518  }
519 
520  return nullptr;
521  }
522 
523  void check()
524  {
525  if (m_Buckets == nullptr)
526  {
527  m_Buckets = new bucket[InitialBuckets];
528  m_nBuckets = InitialBuckets;
529  m_nMask = InitialBuckets - 1;
530  setDefaults(true);
531  }
532  }
533 
534  void rehash(size_t oldCount = 0)
535  {
536  if (oldCount == 0)
537  {
538  oldCount = m_nBuckets;
539  }
540 
541  bucket *oldBuckets = m_Buckets;
542  m_Buckets = new bucket[m_nBuckets];
543  setDefaults(true);
544 
545  if (m_nItems)
546  {
547  // Performing a new insert, clear out the number of items as
548  // insert() will increment otherwise.
549  m_nItems = 0;
550 
551  for (size_t i = 0; i < oldCount; ++i)
552  {
553  if (oldBuckets[i].set)
554  {
555  insert(oldBuckets[i].key, oldBuckets[i].value);
556  }
557  }
558  }
559  delete[] oldBuckets;
560  }
561 
562  size_t nextIndex(size_t i, size_t &index, size_t &step) const
563  {
564  if (QuadraticProbe)
565  {
566  index = (index + step) & m_nMask;
567  ++step;
568  }
569  else
570  {
571  index = i;
572  }
573 
574  return index;
575  }
576 
577  bucket *findNextEmpty(size_t currentHash)
578  {
579  size_t index = 0;
580  size_t step = 1;
581  for (size_t i = 0; i < m_nBuckets; ++i)
582  {
583  size_t nextHash =
584  (currentHash + nextIndex(i, index, step)) & m_nMask;
585  bucket *b = &m_Buckets[nextHash];
586  if (!b->set)
587  {
588  return b;
589  }
590  }
591 
592  return nullptr;
593  }
594 
595  bucket *findNextSet(size_t currentHash, const K &k, uint32_t khash=0)
596  {
597  size_t index = 0;
598  size_t step = 1;
599  if (khash == 0)
600  {
601  khash = k.hash();
602  }
603  for (size_t i = 0; i < m_nBuckets; ++i)
604  {
605  size_t nextHash =
606  (currentHash + nextIndex(i, index, step)) & m_nMask;
607  bucket *b = &m_Buckets[nextHash];
608 
609  // Hash comparison is likely to be faster than raw object
610  // comparison so we save the latter for when we have a candidate.
611  if (b->set && (b->key.hash() == khash))
612  {
613  if (b->key == k)
614  {
615  return b;
616  }
617  }
618  }
619 
620  return nullptr;
621  }
622 
623  template <class FindK = K>
624  const bucket *findNextSet(size_t currentHash, const FindK &k, uint32_t khash=0) const
625  {
626  size_t index = 0;
627  size_t step = 1;
628  if (khash == 0)
629  {
630  khash = k.hash();
631  }
632  for (size_t i = 0; i < m_nBuckets; ++i)
633  {
634  size_t nextHash =
635  (currentHash + nextIndex(i, index, step)) & m_nMask;
636  const bucket *b = &m_Buckets[nextHash];
637 
638  // Hash comparison is likely to be faster than raw object
639  // comparison so we save the latter for when we have a candidate.
640  if (b->set && (b->key.hash() == khash))
641  {
642  if (b->key == k)
643  {
644  return b;
645  }
646  }
647  }
648 
649  return nullptr;
650  }
651 
652  template <class LookupK>
653  const struct bucket *
654  doLookup(const LookupK &k, HashTableError::Error &err) const
655  {
656  if ((!m_Buckets) || (!m_nItems))
657  {
658  err = HashTableError::HashTableEmpty;
659  return nullptr;
660  }
661 
662  const uint32_t khash = k.hash();
663  size_t hash = khash & m_nMask;
664 
665  const bucket *b = &m_Buckets[hash];
666  if (!b->set)
667  {
668  err = HashTableError::NotFound;
669  return nullptr;
670  }
671 
672  if (b->key != k)
673  {
674  b = findNextSet(hash, k, khash);
675  if (!b)
676  {
677  err = HashTableError::NotFound;
678  return nullptr;
679  }
680  }
681 
682  return b;
683  }
684 
686  void setDefaults(bool reparentToo=false)
687  {
688  for (size_t i = 0; i < m_nBuckets; ++i)
689  {
690  if (reparentToo)
691  {
692  m_Buckets[i].parent = this;
693  }
694 
695  if (m_Buckets[i].set)
696  {
697  continue;
698  }
699 
700  m_Buckets[i].value = m_Default;
701  }
702  }
703 
704  void resetParents()
705  {
706  for (size_t i = 0; i < m_nBuckets; ++i)
707  {
708  m_Buckets[i].parent = this;
709  }
710  }
711 
712  bucket *m_Buckets;
713  V m_Default;
714  size_t m_nBuckets;
715  size_t m_nItems;
716  size_t m_nMask;
717 };
718 
721 #endif
Definition: Pair.h:30
Struct * __getNode()
Definition: Iterator.h:151
bool contains(const K &k) const
Definition: HashTable.h:165
Definition: Result.h:36
bool update(const K &k, const V &v)
Definition: HashTable.h:305
void clear()
Definition: HashTable.h:150
Iterator erase(Iterator &at)
Definition: HashTable.h:443
void setDefaults(bool reparentToo=false)
Definition: HashTable.h:686
HashTable(const V &customDefault)
Definition: HashTable.h:142
void reserve(size_t numItems)
Definition: HashTable.h:373
bool insert(const K &k, const V &v)
Definition: HashTable.h:264
An iterator applicable for many data structures.
Definition: Iterator.h:43
LookupResult lookup(const K &k) const
Definition: HashTable.h:200
PairLookupResult getNth(size_t n) const
Definition: HashTable.h:238
void copyFrom(const SelfType &other)
Definition: HashTable.h:477
Definition: errors.h:24