⟦0b9c876f5⟧

TextFile

// Implements the Patricia Trie ADT.  Includes operations for 
// * Initialization
// * Insertion
// * Retrieval

typedef void *CONTENTS; // This could change, depending on what client wants

/**********************************************************************/

class Patricia_Trie 
{
  
#define BIT(I) (HI_WORD - (I))  // Normalizes the ith Bit representation
#define BIT_MASK 07             // WORD_SHIFT low-order bits enabled
#define IS_BIT_SET(BLOCK,SHIFT) (((BLOCK) & (1 << (SHIFT))) != 0)
#define HI_WORD 7               // Hi-order bit, starting count from 0
#define NIL(TYPE) ((TYPE *)0)   // Silly little macro!
#define POW2(I) (1 << (I))      // i.e. 2^(I)
#define WORD_BITS 8             // Number of Bits in a Block
#define WORD_SHIFT 3            // i.e. lg (WORD_BITS) 
  
  /**********************************************************************/
  
private:
  
  /**********************************************************************/
  
  class Trie_Node 
    {                           // Nested class definition, but necessarily visible to clients
      
  friend class Patricia_Trie;
      
    private:
      
      char      *Key;               // Only works safely and easily for char *'s
      CONTENTS   Contents;              // Pointer to record Contents referenced by Key
      int        Branch_Bit;                // Stores index of next Bit to compare
      Trie_Node *Left;
      Trie_Node *Right;
      
      /**********************************************************************/
      
      Trie_Node (char *K = "", int Len = 0, CONTENTS New_Contents = 0, int B = 0):
      Branch_Bit (B), Contents (New_Contents) 
        {
          if (!Len)            // Dynamically compute the length, if it's not given
            Len = strlen (K) + 1;
          Key = new char[Len];
          strcpy (Key, K);
        }
      
      /**********************************************************************/
      
    public:
      
      /**********************************************************************/
      
      char *Return_Key (void)
        {                       // Returns the Key
          return (Key);
        }
      
      /**********************************************************************/
      
      CONTENTS Return_Contents (void)
        {                       // Pretty obvious, eh?
          return (Contents);
        }
      
    } *Root;                // Root for the entire Patricia tree, (actually a dummy header!)
  
  /**********************************************************************/
  
  void Dispose_Trie (Trie_Node *Root)
    {
      // Recursively free tree memory via modified post-order traversal
      
      if (Root->Left->Branch_Bit <= Root->Branch_Bit &&
          Root->Right->Branch_Bit <= Root->Branch_Bit)
        ;                       // do nothing!
      else if (Root->Left->Branch_Bit <= Root->Branch_Bit) 
        Dispose_Trie (Root->Right);
      else if (Root->Right->Branch_Bit <= Root->Branch_Bit) 
        Dispose_Trie (Root->Left);
      else
        {
          Dispose_Trie (Root->Left);
          Dispose_Trie (Root->Right);
        }
      delete Root;
    }
  
public:
  
  /**********************************************************************/
  
  Patricia_Trie (void)
    {                           // Initializes Patricia_Trie, using dummy header 
      Root        = new Trie_Node;
      Root->Left  = Root;
      Root->Right = Root;
    }
  
  /**********************************************************************/
  
  ~Patricia_Trie (void)
    { // Frees all dynamic memory in Patricia Trie
      Dispose_Trie (Root->Left);
      delete Root;
    }
  
  /**********************************************************************/
  
/*
  Attempts to locate the record associated with Key.  The basic
  algorithm is abstractly similar to Sedgewick's description in his
  Algorithm's book.  However, I've modified things to speed up the Bit
  comparisons greatly, as well as to run the Branch_Bit index from
  0..whatever, since this allows arbitrarily large keys (For some
  strange reason Sedgewick goes from some predefined limit,
  Max_Branch_Bit, *downto* 0!!).

  Most of the contortions below help manage a Bit cache, which
  reduces the total amount of work required to test the next Branch_Bit.
  Empirical tests revealed that this was main bottleneck in the Find
  routine.  To make the algorithm work I needed to have the first Bit in
  the first word always be 0.  Therefore, I've ordered the bits from
  high-order Bit to low-order Bit (e.g., 8 .. 1), running from word 0
  to word K.  This fits intuitively with how you might draw the bits onn
  paper, but *not* with how it would work if you programmed it in the
  normal way (i.e., running from bit 1 to 8, from word 0 to K).  At
  any rate, that's what the BIT macro is for, it normalizes my abstract
  concept of bit-order with the actual bit-order. */

  Trie_Node *Find (char *Key)
    {
      
      Trie_Node *Parent;
      Trie_Node *Cur   = Root->Left; // Root is *always* a dummy node, skip it
      char      *Block = Key;        // Pointer to Curent Block of Bits
      int        Lower = 0;
      
      do
        {
          Parent  = Cur;   
          int Bit = Cur->Branch_Bit - Lower;
          if (Bit >= WORD_BITS)
            {                   // Oh well, time to refill the Bit cache!
              
              while (Lower + WORD_BITS <= Cur->Branch_Bit)
                {
                  Lower += WORD_BITS;
                  ++Block;
                  Bit -= WORD_BITS;
                }                   // This loop gets executed infrequently, in general
              
            }
          Cur = (IS_BIT_SET (*Block, BIT (Bit)) ? Cur->Right : Cur->Left);
        }
      while (Parent->Branch_Bit < Cur->Branch_Bit);
      
      return Cur;     // Let calling routine worry whether Keys matched!
    }

   /**********************************************************************/
   
  void Insert (char *Key, CONTENTS Contents, int Key_Len = 0)
    {
      Trie_Node *Parent;
      Trie_Node *Cur   = Root;
      char      *Block = Key;
      int        Lower = 0;

      do
        {                       // This loop is essentially the same as in the Find routine.
          Parent  = Cur;   
          int Bit = Cur->Branch_Bit - Lower;
          if (Bit >= WORD_BITS)
            {
              while (Lower + WORD_BITS <= Cur->Branch_Bit)
                {
                  Lower += WORD_BITS;
                  ++Block;
                  Bit -= WORD_BITS;
                }
            }
          Cur = (IS_BIT_SET (*Block, BIT (Bit)) ? Cur->Right : Cur->Left);
        } 
      while (Parent->Branch_Bit < Cur->Branch_Bit);
      
      if (strcmp (Key, Cur->Key))
        {                       // Exclude duplicates
          char * Key_Block = Key;
          char * Cur_Block = Cur->Key;
          
          for (int First_Bit_Diff = 0; // Find the first word where Bits differ
               *Cur_Block == *Key_Block;  // Skip common prefixes
               First_Bit_Diff += WORD_BITS, Cur_Block++, Key_Block++);
          
          for (int Bit = *Cur_Block ^ *Key_Block; // Now, find location of that Bit
               ! (Bit & POW2 (HI_WORD));            // xor does us a favor here
               Bit <<= 1, First_Bit_Diff++);
          
          if (Parent->Branch_Bit > First_Bit_Diff)
            {                   // *Not* adding at a leaf node
              
              // This is almost identical to the original Find above, however, we are
              // guaranteed to end up at an internal node, rather than a leaf.
              
              for (Cur = Root, Lower = 0, Cur_Block = Key; ;)
                {
                  Parent  = Cur;   
                  int Bit = Cur->Branch_Bit - Lower;
                  if (Bit >= WORD_BITS)
                    {
                      
                      while (Lower + WORD_BITS <= Cur->Branch_Bit)
                        {
                          Lower += WORD_BITS;
                          ++Cur_Block;
                          Bit -= WORD_BITS;
                        }
                      
                    }
                  Cur = (IS_BIT_SET (*Cur_Block, BIT (Bit)) ? Cur->Right : Cur->Left);
                  if (Cur->Branch_Bit >= First_Bit_Diff) 
                    break;
                }
            }
          
          Trie_Node *T = new Trie_Node (Key, Key_Len, Contents, First_Bit_Diff);
          
          // Key_Block was saved from above, this avoids some costly recomputation.
          if (IS_BIT_SET (*Key_Block, BIT (First_Bit_Diff & BIT_MASK)))
            {
              T->Left  = Cur; 
              T->Right = T;
            }
          else
            {
              T->Left  = T; 
              T->Right = Cur;
            }
          
          // Determine whether the new node goes to the left or right of its parent
          Block = (Key) + (Parent->Branch_Bit >> WORD_SHIFT);
          if (IS_BIT_SET (*Block, BIT (Parent->Branch_Bit & BIT_MASK))) 
            Parent->Right = T;
          else 
            Parent->Left = T;
      }
   }

// Zap all those ugly #defines 

#undef BIT
#undef BIT_MASK
#undef HI_WORD
#undef IS_BIT_SET
#undef NIL
#undef POW2
#undef WORD_BITS
#undef WORD_SHIFT

}; // end of class Patricia_Trie
DataMuseum.dk

DKUUG/EUUG Conference tapes

⟦0b9c876f5⟧ TextFile

Derivation

TextFile