The Pedigree Project  0.1
RadixTree.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2008-2014, Pedigree Developers
3  *
4  * Please see the CONTRIB file in the root of the source tree for a full
5  * list of contributors.
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #ifndef KERNEL_UTILITIES_RADIX_TREE_H
21 #define KERNEL_UTILITIES_RADIX_TREE_H
22 
23 #include "pedigree/kernel/Log.h"
24 #include "pedigree/kernel/compiler.h"
25 #include "pedigree/kernel/processor/types.h"
26 #include "pedigree/kernel/utilities/Iterator.h"
27 #include "pedigree/kernel/utilities/ObjectPool.h"
28 #include "pedigree/kernel/utilities/Result.h"
29 #include "pedigree/kernel/utilities/String.h"
30 #include "pedigree/kernel/utilities/Vector.h"
31 #include "pedigree/kernel/utilities/utility.h"
32 
44 template <class T>
46 {
47  private:
49  class Node
50  {
51  public:
53  enum MatchType
54  {
58  OverMatch
59  };
60 
61  Node(bool bCaseSensitive)
62  : m_Key(), value(T()), m_Children(), m_pParent(0),
63  m_bCaseSensitive(bCaseSensitive), m_pParentTree(0),
64  m_bHasValue(false)
65  {
66  }
67 
68  ~Node();
69 
70  // Clears all known children and returns them to the parent ObjectPool
71  void returnAllChildren();
72 
76  {
77  return doNext();
78  }
83  {
84  return 0;
85  }
86 
89  Node *findChild(const char *cpKey) const;
90 
92  void addChild(Node *pNode);
93 
95  void replaceChild(Node *pNodeOld, Node *pNodeNew);
96 
98  void removeChild(Node *pChild);
99 
103  MatchType matchKey(const char *cpKey, size_t &offset) const;
104 
106  Node *getFirstChild() const;
107 
111  void prependKey(const char *cpKey);
112 
113  void setKey(const char *cpKey);
115  void setKey(const char *cpKey, size_t lengthHint);
116  inline const char *getKey() const
117  {
118  return m_Key;
119  }
120  inline void setValue(const T &pV)
121  {
122  value = pV;
123  m_bHasValue = true;
124  }
125  inline void removeValue()
126  {
127  value = T();
128  m_bHasValue = false;
129  }
130  inline const T &getValue() const
131  {
132  return value;
133  }
134  inline void setParent(Node *pP)
135  {
136  m_pParent = pP;
137  }
138  inline Node *getParent() const
139  {
140  return m_pParent;
141  }
142  inline bool hasValue() const
143  {
144  return m_bHasValue;
145  }
146 
147  void dump(void (*emit_line)(const char *s)) const;
148 
154  T value;
156  childlist_t m_Children;
160  const bool m_bCaseSensitive;
161 
164 
170 
171  private:
172  Node(const Node &);
173  Node &operator=(const Node &);
174 
176  Node *doNext() const;
177 
180  Node *getNextSibling() const;
181  };
182 
183  public:
185  typedef ::Iterator<T, Node> Iterator;
187  typedef typename Iterator::Const ConstIterator;
188  typedef Result<T, bool> LookupType;
189 
191  RadixTree();
194  RadixTree(const RadixTree<T> &x);
196  RadixTree(bool bCaseSensitive);
198  ~RadixTree();
199 
202  RadixTree &operator=(const RadixTree &x);
203 
206  size_t count() const;
210  void insert(const String &key, const T &value);
214  Result<T, bool> lookup(const String &key) const;
216  void remove(const String &key);
217 
219  void clear();
220 
222  Iterator erase(Iterator iter)
223  {
224  Node *iterNode = iter.__getNode();
225  Node *next = iterNode->next();
226  remove(String(iterNode->getKey()));
227  Iterator ret(next);
228  return ret;
229  }
230 
233  inline Iterator begin()
234  {
235  if (!m_pRoot)
236  return Iterator(0);
237  Iterator it(m_pRoot->next());
238  return it;
239  }
242  inline ConstIterator begin() const
243  {
244  if (!m_pRoot)
245  return ConstIterator(0);
246  ConstIterator it(m_pRoot->next());
247  return it;
248  }
251  inline Iterator end()
252  {
253  return Iterator(0);
254  }
257  inline ConstIterator end() const
258  {
259  return ConstIterator(0);
260  }
261 
263  void dump(void (*emit_line)(const char *s)) const;
264 
265  private:
267  Node *cloneNode(Node *node, Node *parent);
269  Node *getNewNode()
270  {
271  Node *p = m_NodePool.allocate(m_bCaseSensitive);
272  p->m_pParentTree = this;
273  return p;
274  }
276  void returnNode(Node *p)
277  {
278  if (p)
279  {
280  // wipe out info that shouldn't go back to the pool
281  p->m_Key = String();
282  p->value = T();
283  p->m_bHasValue = false;
284  m_NodePool.deallocate(p);
285  }
286  }
287 
289  size_t m_nItems;
291  Node *m_pRoot;
293  const bool m_bCaseSensitive;
297 };
298 
299 template <class T>
301  : m_nItems(0), m_pRoot(0), m_bCaseSensitive(true), m_NodePool()
302 {
303 }
304 
305 template <class T>
306 RadixTree<T>::RadixTree(bool bCaseSensitive)
307  : m_nItems(0), m_pRoot(0), m_bCaseSensitive(bCaseSensitive), m_NodePool()
308 {
309 }
310 
311 template <class T>
313 {
314  clear();
316 }
317 
318 template <class T>
321  m_NodePool()
322 {
323  clear();
325  m_pRoot = cloneNode(x.m_pRoot, 0);
326  m_nItems = x.m_nItems;
327 }
328 
329 template <class T>
331 {
332  clear();
334  m_pRoot = cloneNode(x.m_pRoot, 0);
335  m_nItems = x.m_nItems;
337  return *this;
338 }
339 
340 template <class T>
341 size_t RadixTree<T>::count() const
342 {
343  return m_nItems;
344 }
345 
346 template <class T>
347 void RadixTree<T>::insert(const String &key, const T &value)
348 {
349  if (!m_pRoot)
350  {
351  // The root node always exists and is a lambda transition node
352  // (zero-length key). This removes the need for most special cases.
353  m_pRoot = getNewNode();
354  m_pRoot->setKey(0);
355  }
356 
357  Node *pNode = m_pRoot;
358 
359  const char *cpKey = static_cast<const char *>(key);
360  const char *cpKeyOrig = cpKey;
361  size_t cpKeyLength = key.length();
362 
363  while (true)
364  {
365  size_t partialOffset = 0;
366  switch (pNode->matchKey(cpKey, partialOffset))
367  {
368  case Node::ExactMatch:
369  {
370  if (!pNode->hasValue())
371  {
372  m_nItems++;
373  }
374 
375  pNode->setValue(value);
376  return;
377  }
378  case Node::NoMatch:
379  {
380  FATAL("RadixTree: algorithmic error!");
381  break;
382  }
383  case Node::PartialMatch:
384  {
385  // We need to create an intermediate node that contains the
386  // partial match, then adjust the key of this node.
387 
388  // Find the common key prefix.
389  size_t i = partialOffset;
390 
391  Node *pInter = getNewNode();
392 
393  // Intermediate node's key is the common prefix of both keys.
394  pInter->m_Key.assign(cpKey, partialOffset);
395 
396  // Must do this before pNode's key is changed.
397  pNode->getParent()->replaceChild(pNode, pInter);
398 
399  // pNode's new key is the uncommon postfix.
400  size_t len = pNode->m_Key.length();
401 
402  // Note: this is guaranteed to not require an allocation,
403  // because it's smaller than the current string in m_Key. We'll
404  // not overwrite because we're copying from deeper in the
405  // string. The null write will suffice.
406  pNode->m_Key.assign(
407  &pNode->getKey()[partialOffset], len - partialOffset);
408 
409  // If the uncommon postfix of the key is non-zero length, we
410  // have to create another node, a child of pInter.
411  if (cpKey[partialOffset] != 0)
412  {
413  Node *pChild = getNewNode();
414  pChild->setKey(
415  &cpKey[partialOffset],
416  (cpKeyLength - partialOffset - (cpKey - cpKeyOrig)));
417  pChild->setValue(value);
418  pChild->setParent(pInter);
419  pInter->addChild(pChild);
420  }
421  else
422  {
423  pInter->setValue(value);
424  }
425 
426  pInter->setParent(pNode->getParent());
427  pInter->addChild(pNode);
428  pNode->setParent(pInter);
429 
430  m_nItems++;
431  return;
432  }
433  case Node::OverMatch:
434  {
435  cpKey += pNode->m_Key.length();
436 
437  Node *pChild = pNode->findChild(cpKey);
438  if (pChild)
439  {
440  pNode = pChild;
441  // Iterative case.
442  break;
443  }
444  else
445  {
446  // No child - create a new one.
447  pChild = getNewNode();
448  pChild->setKey(cpKey, cpKeyLength - (cpKey - cpKeyOrig));
449  pChild->setValue(value);
450  pChild->setParent(pNode);
451  pNode->addChild(pChild);
452 
453  m_nItems++;
454  return;
455  }
456  }
457  }
458  }
459 }
460 
461 template <class T>
463 {
464  if (!m_pRoot)
465  {
466  return LookupType::withError(true);
467  }
468 
469  Node *pNode = m_pRoot;
470 
471  const char *cpKey = static_cast<const char *>(key);
472 
473  while (true)
474  {
475  size_t offset = 0;
476  switch (pNode->matchKey(cpKey, offset))
477  {
478  case Node::ExactMatch:
479  if (!pNode->hasValue())
480  {
481  // No value here, exact match on key. This can happen in
482  // cases where we needed to create a node to split a key
483  // but nothing was attached to the split node.
484  return LookupType::withError(true);
485  }
486  return LookupType::withValue(pNode->getValue());
487  case Node::NoMatch:
488  case Node::PartialMatch:
489  return LookupType::withError(true);
490  case Node::OverMatch:
491  {
492  cpKey += pNode->m_Key.length();
493 
494  Node *pChild = pNode->findChild(cpKey);
495  if (pChild)
496  {
497  pNode = pChild;
498  // Iterative case.
499  break;
500  }
501  else
502  {
503  return LookupType::withError(true);
504  }
505  }
506  }
507  }
508 }
509 
510 template <class T>
511 void RadixTree<T>::remove(const String &key)
512 {
513  if (!m_pRoot)
514  {
515  // The root node always exists and is a lambda transition node
516  // (zero-length key). This removes the need for most special cases.
517  m_pRoot = getNewNode();
518  m_pRoot->setKey(0);
519  }
520 
521  Node *pNode = m_pRoot;
522 
523  const char *cpKey = static_cast<const char *>(key);
524 
525  // Our invariant is that the root node always exists. Therefore we must
526  // special case here so it doesn't get deleted.
527  if (*cpKey == 0)
528  {
529  m_pRoot->removeValue();
530  return;
531  }
532 
533  while (true)
534  {
535  size_t offset = 0;
536  switch (pNode->matchKey(cpKey, offset))
537  {
538  case Node::ExactMatch:
539  {
540  // Delete this node. If we set the value to zero, it is
541  // effectively removed from the map. There are only certain
542  // cases in which we can delete the node completely, however.
543  pNode->removeValue();
544  m_nItems--;
545 
546  // We have the invariant that the tree is always optimised. This
547  // means that when we delete a node we only have to optimise the
548  // local branch. There are two situations that need covering:
549  // (a) No children. This means it's a leaf node and can be
550  // deleted. We then need to consider its parent,
551  // which, if its value is zero and now has zero or one
552  // children can be optimised itself.
553  // (b) One child. This is a linear progression and the child
554  // node's key can be changed to be concatenation of
555  // pNode's and its. This doesn't affect anything farther
556  // up the tree and so no recursion is needed.
557 
558  Node *pParent = 0;
559  if (pNode->m_Children.count() == 0)
560  {
561  // Leaf node, can just delete.
562  pParent = pNode->getParent();
563  pParent->removeChild(pNode);
564  returnNode(pNode);
565 
566  pNode = pParent;
567  // Optimise up the tree.
568  while (true)
569  {
570  if (pNode == m_pRoot)
571  return;
572 
573  if (pNode->m_Children.count() == 1 &&
574  !pNode->hasValue())
575  // Break out of this loop and get caught in the next
576  // if(pNode->m_nChildren == 1)
577  break;
578 
579  if (pNode->m_Children.count() == 0 &&
580  !pNode->hasValue())
581  {
582  // Leaf node, can just delete.
583  pParent = pNode->getParent();
584  pParent->removeChild(pNode);
585  returnNode(pNode);
586 
587  pNode = pParent;
588  continue;
589  }
590  return;
591  }
592  }
593 
594  if (pNode->m_Children.count() == 1)
595  {
596  // Change the child's key to be the concatenation of ours
597  // and its.
598  Node *pChild = pNode->getFirstChild();
599  pParent = pNode->getParent();
600 
601  // Must call this before delete, so pChild doesn't get
602  // deleted.
603  pNode->removeChild(pChild);
604 
605  pChild->prependKey(pNode->getKey());
606  pChild->setParent(pParent);
607  pParent->removeChild(pNode);
608  pParent->addChild(pChild);
609 
610  returnNode(pNode);
611  }
612  return;
613  }
614  case Node::NoMatch:
615  case Node::PartialMatch:
616  // Can't happen unless the key didn't actually exist.
617  return;
618  case Node::OverMatch:
619  {
620  cpKey += pNode->m_Key.length();
621 
622  Node *pChild = pNode->findChild(cpKey);
623  if (pChild)
624  {
625  pNode = pChild;
626  // Iterative case.
627  break;
628  }
629  else
630  return;
631  }
632  }
633  }
634 }
635 
636 template <class T>
638 {
639  // Deal with the easy case first.
640  if (!pNode)
641  return 0;
642 
643  Node *n = getNewNode();
644  n->setKey(pNode->m_Key);
645  n->setValue(pNode->value);
646  n->setParent(pParent);
647 
649  pNode->m_Children.begin();
650  it != pNode->m_Children.end(); ++it)
651  {
652  n->addChild(cloneNode((*it), pParent));
653  }
654 
655  return n;
656 }
657 
658 template <class T>
660 {
662  m_pRoot = getNewNode();
663  m_pRoot->setKey(0);
664  m_nItems = 0;
665 }
666 
667 template <class T>
668 void RadixTree<T>::dump(void (*emit_line)(const char *s)) const
669 {
670  if (m_pRoot)
671  {
672  m_pRoot->dump(emit_line);
673  }
674 }
675 
676 //
677 // RadixTree::Node implementation.
678 //
679 
680 template <class T>
682 {
683  returnAllChildren();
684 }
685 
686 template <class T>
688 {
689  // Returns depth-first, which ensures we're not returning any children
690  // while the ObjectPool lock is held.
691  for (auto it : m_Children)
692  {
693  it->returnAllChildren();
694  m_pParentTree->returnNode(it);
695  }
696 
697  m_Children.clear();
698 }
699 
700 template <class T>
701 typename RadixTree<T>::Node *
702 RadixTree<T>::Node::findChild(const char *cpKey) const
703 {
704  for (auto it : m_Children)
705  {
706  size_t offset = 0;
707  if (it->matchKey(cpKey, offset) != NoMatch)
708  {
709  return it;
710  }
711  }
712  return 0;
713 }
714 
715 template <class T>
717 {
718  if (pNode)
719  m_Children.pushBack(pNode);
720 }
721 
722 template <class T>
723 void RadixTree<T>::Node::replaceChild(Node *pNodeOld, Node *pNodeNew)
724 {
725  for (auto it = m_Children.begin(); it != m_Children.end(); ++it)
726  {
727  if (*it == pNodeOld)
728  {
729  *it = pNodeNew;
730  break;
731  }
732  }
733 }
734 
735 template <class T>
737 {
739  m_Children.begin();
740  it != m_Children.end();)
741  {
742  if ((*it) == pChild)
743  {
744  it = m_Children.erase(it);
745  }
746  else
747  ++it;
748  }
749 }
750 
751 template <class T>
753 RadixTree<T>::Node::matchKey(const char *cpKey, size_t &offset) const
754 {
755  // Cannot partially match (e.g. toast/toastier == PartialMatch if the
756  // cpKey is longer (e.g. toastier/toast == OverMatch)).
757  if (!m_Key.length())
758  {
759  return OverMatch;
760  }
761 
762  const char *myKey = getKey();
763 
764  // Do some quick checks early.
765  if ((m_bCaseSensitive && (cpKey[0] != myKey[0])) ||
766  ((!m_bCaseSensitive) && (toLower(cpKey[0]) != toLower(myKey[0]))))
767  {
768  // non-partial, first character didn't even match
769  return NoMatch;
770  }
771 
772  size_t i = 0;
773  int r = StringCompareCase(
774  cpKey, myKey, m_bCaseSensitive, m_Key.length() + 1, &i);
775 
776  if (r == 0)
777  {
778  // exact match, all characters & null terminators matched
779  return ExactMatch;
780  }
781  else if (m_Key[i] == 0)
782  {
783  return OverMatch;
784  }
785  else
786  {
787  // partial, both strings share a common prefix
788  offset = i;
789  return PartialMatch;
790  }
791 }
792 
793 template <class T>
794 void RadixTree<T>::Node::setKey(const char *cpKey)
795 {
796  m_Key.assign(cpKey);
797 }
798 
799 template <class T>
800 void RadixTree<T>::Node::setKey(const char *cpKey, size_t lengthHint)
801 {
802  m_Key.assign(cpKey, lengthHint);
803 }
804 
805 template <class T>
807 {
808  return *(m_Children.begin());
809 }
810 
811 template <class T>
812 void RadixTree<T>::Node::prependKey(const char *cpKey)
813 {
814  String temp = m_Key;
815  m_Key.assign(cpKey);
816  m_Key += temp;
817 }
818 
819 template <class T>
821 {
822  // pNode needs to be settable, but not what it points to!
823  Node const *pNode = this;
824  while ((pNode == this) || (pNode && (!pNode->value)))
825  {
826  Node const *tmp;
827  if (pNode->m_Children.count())
828  pNode = pNode->getFirstChild();
829  else
830  {
831  tmp = pNode;
832  pNode = 0;
833  while (tmp && tmp->m_pParent != 0 /* Root node */)
834  {
835  if ((pNode = tmp->getNextSibling()) != 0)
836  break;
837  tmp = tmp->m_pParent;
838  }
839  if (tmp->m_pParent == 0)
840  return 0;
841  }
842  }
843  return const_cast<Node *>(pNode);
844 }
845 
846 template <class T>
848 {
849  if (!m_pParent)
850  return 0;
851 
852  bool b = false;
854  m_pParent->m_Children.begin();
855  it != m_pParent->m_Children.end(); ++it)
856  {
857  if (b)
858  return (*it);
859  if ((*it) == this)
860  b = true;
861  }
862 
863  return 0;
864 }
865 
866 template <class T>
867 void RadixTree<T>::Node::dump(void (*emit_line)(const char *s)) const
868 {
869  for (auto it : m_Children)
870  {
871  // depth-first dump the next node
872  it->dump(emit_line);
873 
874  // dump this connection
875  String s;
876  s.Format(
877  " \"Node<%p: %s>\" -> \"Node<%p: %s>\";",
878  static_cast<const void *>(it), it->getKey(),
879  static_cast<const void *>(this), static_cast<const char *>(m_Key));
880  emit_line(static_cast<const char *>(s));
881  }
882 }
883 
884 // Explicitly instantiate RadixTree<void*> early.
885 extern template class RadixTree<void *>; // IWYU pragma: keep
886 
889 #endif
ConstIterator end() const
Definition: RadixTree.h:257
ConstIterator begin() const
Definition: RadixTree.h:242
Node * getNewNode()
Definition: RadixTree.h:269
MatchType matchKey(const char *cpKey, size_t &offset) const
Definition: RadixTree.h:753
Iterator begin()
Definition: Vector.h:148
Iterator end()
Definition: Vector.h:160
Node * m_pRoot
Definition: RadixTree.h:291
::Iterator< T, Node > Iterator
Definition: RadixTree.h:185
size_t count() const
Definition: Vector.h:264
A key/value dictionary for string keys.
Definition: RadixTree.h:45
Struct * __getNode()
Definition: Iterator.h:151
childlist_t m_Children
Definition: RadixTree.h:156
~RadixTree()
Definition: RadixTree.h:312
Node * doNext() const
Definition: RadixTree.h:820
Node * previous()
Definition: RadixTree.h:82
size_t count() const
Definition: RadixTree.h:341
void insert(const String &key, const T &value)
Definition: RadixTree.h:347
Node * findChild(const char *cpKey) const
Definition: RadixTree.h:702
Definition: String.h:49
Definition: Result.h:36
Node * getNextSibling() const
Definition: RadixTree.h:847
Key matched node key, and had extra characters.
Definition: RadixTree.h:58
Key matched node key exactly.
Definition: RadixTree.h:55
Iterator erase(Iterator iter)
Definition: RadixTree.h:222
Iterator::Const ConstIterator
Definition: RadixTree.h:187
void returnNode(Node *p)
Definition: RadixTree.h:276
Key didn&#39;t match node key at all.
Definition: RadixTree.h:56
Result< T, bool > lookup(const String &key) const
Definition: RadixTree.h:462
void dump(void(*emit_line)(const char *s)) const
Definition: RadixTree.h:668
const bool m_bCaseSensitive
Definition: RadixTree.h:293
Node ** Iterator
Definition: Vector.h:37
A subset of key matched the node key.
Definition: RadixTree.h:57
RadixTree & operator=(const RadixTree &x)
Definition: RadixTree.h:330
size_t m_nItems
Definition: RadixTree.h:289
void clear()
Definition: RadixTree.h:659
Node * m_pParent
Definition: RadixTree.h:158
void prependKey(const char *cpKey)
Definition: RadixTree.h:812
An iterator applicable for many data structures.
Definition: Iterator.h:43
Iterator begin()
Definition: RadixTree.h:233
void removeChild(Node *pChild)
Definition: RadixTree.h:736
const bool m_bCaseSensitive
Definition: RadixTree.h:160
RadixTree * m_pParentTree
Definition: RadixTree.h:163
Iterator end()
Definition: RadixTree.h:251
#define FATAL(text)
Definition: Log.h:89
Node * getFirstChild() const
Definition: RadixTree.h:806
void replaceChild(Node *pNodeOld, Node *pNodeNew)
Definition: RadixTree.h:723
void remove(const String &key)
Definition: RadixTree.h:511
Node * cloneNode(Node *node, Node *parent)
Definition: RadixTree.h:637
void addChild(Node *pNode)
Definition: RadixTree.h:716
ObjectPool< Node > m_NodePool
Definition: RadixTree.h:296
Node * next()
Definition: RadixTree.h:75