|
DataMuseum.dkPresents historical artifacts from the history of: DKUUG/EUUG Conference tapes |
This is an automatic "excavation" of a thematic subset of
See our Wiki for more about DKUUG/EUUG Conference tapes Excavated with: AutoArchaeologist - Free & Open Source Software. |
top - metrics - downloadIndex: P T
Length: 9410 (0x24c2) Types: TextFile Names: »Patricia.h«
└─⟦a05ed705a⟧ Bits:30007078 DKUUG GNU 2/12/89 └─⟦cc8755de2⟧ »./libg++-1.36.1.tar.Z« └─⟦23757c458⟧ └─⟦this⟧ »libg++/etc/Patricia.h«
// Implements the Patricia Trie ADT. Includes operations for // * Initialization // * Insertion // * Retrieval typedef void *CONTENTS; // This could change, depending on what client wants /**********************************************************************/ class Patricia_Trie { #define BIT(I) (HI_WORD - (I)) // Normalizes the ith Bit representation #define BIT_MASK 07 // WORD_SHIFT low-order bits enabled #define IS_BIT_SET(BLOCK,SHIFT) (((BLOCK) & (1 << (SHIFT))) != 0) #define HI_WORD 7 // Hi-order bit, starting count from 0 #define NIL(TYPE) ((TYPE *)0) // Silly little macro! #define POW2(I) (1 << (I)) // i.e. 2^(I) #define WORD_BITS 8 // Number of Bits in a Block #define WORD_SHIFT 3 // i.e. lg (WORD_BITS) /**********************************************************************/ private: /**********************************************************************/ class Trie_Node { // Nested class definition, but necessarily visible to clients friend class Patricia_Trie; private: char *Key; // Only works safely and easily for char *'s CONTENTS Contents; // Pointer to record Contents referenced by Key int Branch_Bit; // Stores index of next Bit to compare Trie_Node *Left; Trie_Node *Right; /**********************************************************************/ Trie_Node (char *K = "", int Len = 0, CONTENTS New_Contents = 0, int B = 0): Branch_Bit (B), Contents (New_Contents) { if (!Len) // Dynamically compute the length, if it's not given Len = strlen (K) + 1; Key = new char[Len]; strcpy (Key, K); } /**********************************************************************/ public: /**********************************************************************/ char *Return_Key (void) { // Returns the Key return (Key); } /**********************************************************************/ CONTENTS Return_Contents (void) { // Pretty obvious, eh? return (Contents); } } *Root; // Root for the entire Patricia tree, (actually a dummy header!) /**********************************************************************/ void Dispose_Trie (Trie_Node *Root) { // Recursively free tree memory via modified post-order traversal if (Root->Left->Branch_Bit <= Root->Branch_Bit && Root->Right->Branch_Bit <= Root->Branch_Bit) ; // do nothing! else if (Root->Left->Branch_Bit <= Root->Branch_Bit) Dispose_Trie (Root->Right); else if (Root->Right->Branch_Bit <= Root->Branch_Bit) Dispose_Trie (Root->Left); else { Dispose_Trie (Root->Left); Dispose_Trie (Root->Right); } delete Root; } public: /**********************************************************************/ Patricia_Trie (void) { // Initializes Patricia_Trie, using dummy header Root = new Trie_Node; Root->Left = Root; Root->Right = Root; } /**********************************************************************/ ~Patricia_Trie (void) { // Frees all dynamic memory in Patricia Trie Dispose_Trie (Root->Left); delete Root; } /**********************************************************************/ /* Attempts to locate the record associated with Key. The basic algorithm is abstractly similar to Sedgewick's description in his Algorithm's book. However, I've modified things to speed up the Bit comparisons greatly, as well as to run the Branch_Bit index from 0..whatever, since this allows arbitrarily large keys (For some strange reason Sedgewick goes from some predefined limit, Max_Branch_Bit, *downto* 0!!). Most of the contortions below help manage a Bit cache, which reduces the total amount of work required to test the next Branch_Bit. Empirical tests revealed that this was main bottleneck in the Find routine. To make the algorithm work I needed to have the first Bit in the first word always be 0. Therefore, I've ordered the bits from high-order Bit to low-order Bit (e.g., 8 .. 1), running from word 0 to word K. This fits intuitively with how you might draw the bits onn paper, but *not* with how it would work if you programmed it in the normal way (i.e., running from bit 1 to 8, from word 0 to K). At any rate, that's what the BIT macro is for, it normalizes my abstract concept of bit-order with the actual bit-order. */ Trie_Node *Find (char *Key) { Trie_Node *Parent; Trie_Node *Cur = Root->Left; // Root is *always* a dummy node, skip it char *Block = Key; // Pointer to Curent Block of Bits int Lower = 0; do { Parent = Cur; int Bit = Cur->Branch_Bit - Lower; if (Bit >= WORD_BITS) { // Oh well, time to refill the Bit cache! while (Lower + WORD_BITS <= Cur->Branch_Bit) { Lower += WORD_BITS; ++Block; Bit -= WORD_BITS; } // This loop gets executed infrequently, in general } Cur = (IS_BIT_SET (*Block, BIT (Bit)) ? Cur->Right : Cur->Left); } while (Parent->Branch_Bit < Cur->Branch_Bit); return Cur; // Let calling routine worry whether Keys matched! } /**********************************************************************/ void Insert (char *Key, CONTENTS Contents, int Key_Len = 0) { Trie_Node *Parent; Trie_Node *Cur = Root; char *Block = Key; int Lower = 0; do { // This loop is essentially the same as in the Find routine. Parent = Cur; int Bit = Cur->Branch_Bit - Lower; if (Bit >= WORD_BITS) { while (Lower + WORD_BITS <= Cur->Branch_Bit) { Lower += WORD_BITS; ++Block; Bit -= WORD_BITS; } } Cur = (IS_BIT_SET (*Block, BIT (Bit)) ? Cur->Right : Cur->Left); } while (Parent->Branch_Bit < Cur->Branch_Bit); if (strcmp (Key, Cur->Key)) { // Exclude duplicates char * Key_Block = Key; char * Cur_Block = Cur->Key; for (int First_Bit_Diff = 0; // Find the first word where Bits differ *Cur_Block == *Key_Block; // Skip common prefixes First_Bit_Diff += WORD_BITS, Cur_Block++, Key_Block++); for (int Bit = *Cur_Block ^ *Key_Block; // Now, find location of that Bit ! (Bit & POW2 (HI_WORD)); // xor does us a favor here Bit <<= 1, First_Bit_Diff++); if (Parent->Branch_Bit > First_Bit_Diff) { // *Not* adding at a leaf node // This is almost identical to the original Find above, however, we are // guaranteed to end up at an internal node, rather than a leaf. for (Cur = Root, Lower = 0, Cur_Block = Key; ;) { Parent = Cur; int Bit = Cur->Branch_Bit - Lower; if (Bit >= WORD_BITS) { while (Lower + WORD_BITS <= Cur->Branch_Bit) { Lower += WORD_BITS; ++Cur_Block; Bit -= WORD_BITS; } } Cur = (IS_BIT_SET (*Cur_Block, BIT (Bit)) ? Cur->Right : Cur->Left); if (Cur->Branch_Bit >= First_Bit_Diff) break; } } Trie_Node *T = new Trie_Node (Key, Key_Len, Contents, First_Bit_Diff); // Key_Block was saved from above, this avoids some costly recomputation. if (IS_BIT_SET (*Key_Block, BIT (First_Bit_Diff & BIT_MASK))) { T->Left = Cur; T->Right = T; } else { T->Left = T; T->Right = Cur; } // Determine whether the new node goes to the left or right of its parent Block = (Key) + (Parent->Branch_Bit >> WORD_SHIFT); if (IS_BIT_SET (*Block, BIT (Parent->Branch_Bit & BIT_MASK))) Parent->Right = T; else Parent->Left = T; } } // Zap all those ugly #defines #undef BIT #undef BIT_MASK #undef HI_WORD #undef IS_BIT_SET #undef NIL #undef POW2 #undef WORD_BITS #undef WORD_SHIFT }; // end of class Patricia_Trie