//****************************** // Written by Peter Golde // Copyright (c) 2004-2005, Wintellect // // Use and restribution of this code is subject to the license agreement // contained in the file "License.txt" accompanying this file. //****************************** using System; using System.Diagnostics; using System.Collections.Generic; using System.Runtime.Serialization; namespace Wintellect.PowerCollections { /// /// The base implementation for various collections classes that use hash tables /// as part of their implementation. This class should not (and can not) be /// used directly by end users; it's only for internal use by the collections package. The Hash /// does not handle duplicate values. /// /// /// The Hash manages items of type T, and uses a IComparer<ItemTYpe> that /// hashes compares items to hash items into the table. /// [Serializable] internal class Hash : IEnumerable, ISerializable, IDeserializationCallback { // NOTE: If you add new member variables, you very well may need to change the serialization // code to serialize that member. private IEqualityComparer equalityComparer; // interface for comparing elements private int count; // The count of elements in the table. private int usedSlots; // Includes real elements and deleted elements with the collision bit on. Used to determine // when we need to resize. private int totalSlots; // Size of the table. Always a power of two. private float loadFactor; // maximal load factor for the table. private int thresholdGrow; // floor(totalSlots * loadFactor); private int thresholdShrink; // thresholdGrow / 3. private int hashMask; // Mask to convert hash values to the size of the table. private int secondaryShift; // Shift to get the secondary skip value. private Slot[] table; // The hash table. private int changeStamp; // An integer that is changed every time the table structurally changes. // Used so that enumerations throw an exception if the tree is changed // during enumeration. private const int MINSIZE = 16; // minimum number of slots. private SerializationInfo serializationInfo; // Info used during deserialization. /// /// The structure that has each slot in the hash table. Each slot has three parts: /// 1. The collision bit. Indicates whether some item visited this slot but had to /// keep looking because the slot was full. /// 2. 31-bit full hash value of the item. If zero, the slot is empty. /// 3. The item itself. /// struct Slot { private uint hash_collision; // Lower 31 bits: the hash value. Top bit: the collision bit. public T item; // The item. /// /// The full hash value associated with the value in this slot, or zero /// if the slot is empty. /// public int HashValue { get { return (int) (hash_collision & 0x7FFFFFFF); } set { Debug.Assert((value & 0x80000000) == 0); // make sure sign bit isn't set. hash_collision = (uint)value | (hash_collision & 0x80000000); } } /// /// Is this slot empty? /// public bool Empty { get { return HashValue == 0; } } /// /// Clear this slot, leaving the collision bit alone. /// public void Clear() { HashValue = 0; item = default(T); // Done to avoid keeping things alive that shouldn't be. } /// /// The "Collision" bit indicates that some value hit this slot and /// collided, so had to try another slot. /// public bool Collision { get { return (hash_collision & 0x80000000) != 0; } set { if (value) hash_collision |= 0x80000000; else hash_collision &= 0x7FFFFFFF; } } } /// /// Constructor. Create a new hash table. /// /// The comparer to use to compare items. public Hash(IEqualityComparer equalityComparer) { this.equalityComparer = equalityComparer; this.loadFactor = 0.70F; // default load factor. } /// /// Gets the current enumeration stamp. Call CheckEnumerationStamp later /// with this value to throw an exception if the hash table is changed. /// /// The current enumeration stamp. internal int GetEnumerationStamp() { return changeStamp; } /// /// Must be called whenever there is a structural change in the tree. Causes /// changeStamp to be changed, which causes any in-progress enumerations /// to throw exceptions. /// internal void StopEnumerations() { ++changeStamp; } /// /// Checks the given stamp against the current change stamp. If different, the /// collection has changed during enumeration and an InvalidOperationException /// must be thrown /// /// changeStamp at the start of the enumeration. internal void CheckEnumerationStamp(int startStamp) { if (startStamp != changeStamp) { throw new InvalidOperationException(Strings.ChangeDuringEnumeration); } } /// /// Gets the full hash code for an item. /// /// Item to get hash code for. /// The full hash code. It is never zero. private int GetFullHash(T item) { uint hash; hash = (uint)Util.GetHashCode(item, equalityComparer); // The .NET framework tends to produce pretty bad hash codes. // Scramble them up to be much more random! hash += ~(hash << 15); hash ^= (hash >> 10); hash += (hash << 3); hash ^= (hash >> 6); hash += ~(hash << 11); hash ^= (hash >> 16); hash &= 0x7FFFFFFF; if (hash == 0) hash = 0x7FFFFFFF; // Make sure it isn't zero. return (int)hash; } /// /// Get the initial bucket number and skip amount from the full hash value. /// /// The full hash value. /// Returns the initial bucket. Always in the range 0..(totalSlots - 1). /// Returns the skip values. Always odd in the range 0..(totalSlots - 1). private void GetHashValuesFromFullHash(int hash, out int initialBucket, out int skip) { initialBucket = hash & hashMask; // The skip value must be relatively prime to the table size. Since the table size is a // power of two, any odd number is relatively prime, so oring in 1 will do it. skip = ((hash >> secondaryShift) & hashMask) | 1; } /// /// Gets the full hash value, initial bucket number, and skip amount for an item. /// /// Item to get hash value of. /// Returns the initial bucket. Always in the range 0..(totalSlots - 1). /// Returns the skip values. Always odd in the range 0..(totalSlots - 1). /// The full hash value. This is never zero. private int GetHashValues(T item, out int initialBucket, out int skip) { int hash = GetFullHash(item); GetHashValuesFromFullHash(hash, out initialBucket, out skip); return hash; } /// /// Make sure there are enough slots in the hash table that /// items can be inserted into the table. /// /// Number of additional items we are inserting. private void EnsureEnoughSlots(int additionalItems) { StopEnumerations(); if (usedSlots + additionalItems > thresholdGrow) { // We need to expand the table. Figure out to what size. int newSize; newSize = Math.Max(totalSlots, MINSIZE); while ((int)(newSize * loadFactor) < usedSlots + additionalItems) { newSize *= 2; if (newSize <= 0) { // Must have overflowed the size of an int. Hard to believe we didn't run out of memory first. throw new InvalidOperationException(Strings.CollectionTooLarge); } } ResizeTable(newSize); } } /// /// Check if the number of items in the table is small enough that /// we should shrink the table again. /// private void ShrinkIfNeeded() { if (count < thresholdShrink) { int newSize; if (count > 0) { newSize = MINSIZE; while ((int)(newSize * loadFactor) < count) newSize *= 2; } else { // We've removed all the elements. Shrink to zero. newSize = 0; } ResizeTable(newSize); } } /// /// Given the size of a hash table, compute the "secondary shift" value -- the shift /// that is used to determine the skip amount for collision resolution. /// /// The new size of the table. /// The secondary skip amount. private int GetSecondaryShift(int newSize) { int x = newSize - 2; // x is of the form 0000111110 -- a single string of 1's followed by a single zero. int secondaryShift = 0; // Keep shifting x until it is the set of bits we want to extract: it be the highest bits possible, // but can't overflow into the sign bit. while ((x & 0x40000000) == 0) { x <<= 1; ++secondaryShift; } return secondaryShift; } /// /// Resize the hash table to the given new size, moving all items into the /// new hash table. /// /// The new size of the hash table. Must be a power /// of two. private void ResizeTable(int newSize) { Slot[] oldTable = table; // Move all the items from this table to the new table. Debug.Assert((newSize & (newSize - 1)) == 0); // Check newSize is a power of two. totalSlots = newSize; thresholdGrow = (int)(totalSlots * loadFactor); thresholdShrink = thresholdGrow / 3; if (thresholdShrink <= MINSIZE) thresholdShrink = 1; hashMask = newSize - 1; secondaryShift = GetSecondaryShift(newSize); if (totalSlots > 0) table = new Slot[totalSlots]; else table = null; if (oldTable != null && table != null) { foreach (Slot oldSlot in oldTable) { int hash, bucket, skip; hash = oldSlot.HashValue; GetHashValuesFromFullHash(hash, out bucket, out skip); // Find an empty bucket. while (! table[bucket].Empty) { // The slot is used, but isn't our item. Set the collision bit and keep looking. table[bucket].Collision = true; bucket = (bucket + skip) & hashMask; } // We found an empty bucket. table[bucket].HashValue = hash; table[bucket].item = oldSlot.item; } } usedSlots = count; // no deleted elements have the collision bit on now. } /// /// Get the number of items in the hash table. /// /// The number of items stored in the hash table. public int ElementCount { get { return count; } } /// /// Get the number of slots in the hash table. Exposed internally /// for testing purposes. /// /// The number of slots in the hash table. internal int SlotCount { get { return totalSlots; } } /// /// Get or change the load factor. Changing the load factor may cause /// the size of the table to grow or shrink accordingly. /// /// public float LoadFactor { get { return loadFactor; } set { // Don't allow hopelessly inefficient load factors. if (value < 0.25 || value > 0.95) throw new ArgumentOutOfRangeException("value", value, Strings.InvalidLoadFactor); StopEnumerations(); bool maybeExpand = value < loadFactor; // May need to expand or shrink the table -- which? // Update loadFactor and thresholds. loadFactor = value; thresholdGrow = (int)(totalSlots * loadFactor); thresholdShrink = thresholdGrow / 3; if (thresholdShrink <= MINSIZE) thresholdShrink = 1; // Possibly expand or shrink the table. if (maybeExpand) EnsureEnoughSlots(0); else ShrinkIfNeeded(); } } /// /// Insert a new item into the hash table. If a duplicate item exists, can replace or /// do nothing. /// /// The item to insert. /// If true, duplicate items are replaced. If false, nothing /// is done if a duplicate already exists. /// If a duplicate was found, returns it (whether replaced or not). /// True if no duplicate existed, false if a duplicate was found. public bool Insert(T item, bool replaceOnDuplicate, out T previous) { int hash, bucket, skip; int emptyBucket = -1; // If >= 0, an empty bucket we can use for a true insert bool duplicateMightExist = true; // If true, still the possibility that a duplicate exists. EnsureEnoughSlots(1); // Ensure enough room to insert. Also stops enumerations. hash = GetHashValues(item, out bucket, out skip); for (;;) { if (table[bucket].Empty) { // Record the location of the first empty bucket seen. This is where the item will // go if no duplicate exists. if (emptyBucket == -1) emptyBucket = bucket; if (!duplicateMightExist || !table[bucket].Collision) { // There can't be a duplicate further on, because a bucket with the collision bit // clear was found (here or earlier). We have the place to insert. break; } } else if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, item)) { // We found a duplicate item. Replace it if requested to. previous = table[bucket].item; if (replaceOnDuplicate) table[bucket].item = item; return false; } else { // The slot is used, but isn't our item. if (!table[bucket].Collision) { // Since the collision bit is off, we can't have a duplicate. if (emptyBucket >= 0) { // We already have an empty bucket to use. break; } else { // Keep searching for an empty bucket to place the item. table[bucket].Collision = true; duplicateMightExist = false; } } } bucket = (bucket + skip) & hashMask; } // We found an empty bucket. Insert the new item. table[emptyBucket].HashValue = hash; table[emptyBucket].item = item; ++count; if (!table[emptyBucket].Collision) ++usedSlots; previous = default(T); return true; } /// /// Deletes an item from the hash table. /// /// Item to search for and delete. /// If true returned, the actual item stored in the hash table (must be /// equal to , but may not be identical. /// True if item was found and deleted, false if item wasn't found. public bool Delete(T item, out T itemDeleted) { int hash, bucket, skip; StopEnumerations(); if (count == 0) { itemDeleted = default(T); return false; } hash = GetHashValues(item, out bucket, out skip); for (; ; ) { if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, item)) { // Found the item. Remove it. itemDeleted = table[bucket].item; table[bucket].Clear(); --count; if (!table[bucket].Collision) --usedSlots; ShrinkIfNeeded(); return true; } else if (!table[bucket].Collision) { // No collision bit, so we can stop searching. No such element. itemDeleted = default(T); return false; } bucket = (bucket + skip) & hashMask; } } /// /// Find an item in the hash table. If found, optionally replace it with the /// finding item. /// /// Item to find. /// If true, replaces the equal item in the hash table /// with . /// Returns the equal item found in the table, if true was returned. /// True if the item was found, false otherwise. public bool Find(T find, bool replace, out T item) { int hash, bucket, skip; if (count == 0) { item = default(T); return false; } hash = GetHashValues(find, out bucket, out skip); for (; ; ) { if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, find)) { // Found the item. item = table[bucket].item; if (replace) table[bucket].item = find; return true; } else if (!table[bucket].Collision) { // No collision bit, so we can stop searching. No such element. item = default(T); return false; } bucket = (bucket + skip) & hashMask; } } /// /// Enumerate all of the items in the hash table. The items /// are enumerated in a haphazard, unpredictable order. /// /// An IEnumerator<T> that enumerates the items /// in the hash table. public IEnumerator GetEnumerator() { if (count > 0) { int startStamp = changeStamp; foreach (Slot slot in table) { if (!slot.Empty) { yield return slot.item; CheckEnumerationStamp(startStamp); } } } } /// /// Enumerate all of the items in the hash table. The items /// are enumerated in a haphazard, unpredictable order. /// /// An IEnumerator that enumerates the items /// in the hash table. System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } /// /// Creates a clone of this hash table. /// /// If non-null, this function is applied to each item when cloning. It must be the /// case that this function does not modify the hash code or equality function. /// A shallow clone that contains the same items. public Hash Clone(Converter cloneItem) { Hash clone = new Hash(equalityComparer); clone.count = this.count; clone.usedSlots = this.usedSlots; clone.totalSlots = this.totalSlots; clone.loadFactor = this.loadFactor; clone.thresholdGrow = this.thresholdGrow; clone.thresholdShrink = this.thresholdShrink; clone.hashMask = this.hashMask; clone.secondaryShift = this.secondaryShift; if (table != null) { clone.table = (Slot[])table.Clone(); if (cloneItem != null) { for (int i = 0; i < table.Length; ++i) { if (!table[i].Empty) table[i].item = cloneItem(table[i].item); } } } return clone; } #region Serialization /// /// Serialize the hash table. Called from the serialization infrastructure. /// void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context) { if (info == null) throw new ArgumentNullException("info"); info.AddValue("equalityComparer", equalityComparer, typeof(IEqualityComparer)); info.AddValue("loadFactor", loadFactor, typeof(float)); T[] items = new T[count]; int i = 0; foreach (Slot slot in table) if (! slot.Empty) items[i++] = slot.item; info.AddValue("items", items, typeof(T[])); } /// /// Called on deserialization. We cannot deserialize now, because hash codes /// might not be correct now. We do real deserialization in the OnDeserialization call. /// protected Hash(SerializationInfo serInfo, StreamingContext context) { // Save away the serialization info for use later. We can't be sure of hash codes // being stable until the entire object graph is deserialized, so we wait until then // to deserialize. serializationInfo = serInfo; } /// /// Deserialize the hash table. Called from the serialization infrastructure when /// the object graph has finished deserializing. /// void IDeserializationCallback.OnDeserialization(object sender) { if (serializationInfo == null) return; loadFactor = serializationInfo.GetSingle("loadFactor"); equalityComparer = (IEqualityComparer) serializationInfo.GetValue("equalityComparer", typeof(IEqualityComparer)); T[] items = (T[])serializationInfo.GetValue("items", typeof(T[])); T dummy; EnsureEnoughSlots(items.Length); foreach (T item in items) Insert(item, true, out dummy); serializationInfo = null; } #endregion Serialization #if DEBUG /// /// Print out basic stats about the hash table. /// internal void PrintStats() { Console.WriteLine("count={0} usedSlots={1} totalSlots={2}", count, usedSlots, totalSlots); Console.WriteLine("loadFactor={0} thresholdGrow={1} thresholdShrink={2}", loadFactor, thresholdGrow, thresholdShrink); Console.WriteLine("hashMask={0:X} secondaryShift={1}", hashMask, secondaryShift); Console.WriteLine(); } /// /// Print out the state of the hash table and each of the slots. Each slot looks like: /// Slot 4: C 4513e41e hello /// where the "C" indicates the collision bit is on /// the next hex number is the hash value /// followed by ToString() on the item. /// internal void Print() { PrintStats(); for (int i = 0; i < totalSlots; ++i) Console.WriteLine("Slot {0,4:X}: {1} {2,8:X} {3}", i, table[i].Collision ? "C" : " ", table[i].HashValue, table[i].Empty ? "" : table[i].item.ToString()); Console.WriteLine(); } /// /// Check that everything appears to be OK in the hash table. /// internal void Validate() { Debug.Assert(count <= usedSlots); Debug.Assert(count <= totalSlots); Debug.Assert(usedSlots <= totalSlots); Debug.Assert(usedSlots <= thresholdGrow); Debug.Assert((int)(totalSlots * loadFactor) == thresholdGrow); if (thresholdShrink > 1) Debug.Assert(thresholdGrow / 3 == thresholdShrink); else Debug.Assert(thresholdGrow / 3 <= MINSIZE); if (totalSlots > 0) { Debug.Assert((totalSlots & (totalSlots - 1)) == 0); // totalSlots is a power of two. Debug.Assert(totalSlots - 1 == hashMask); Debug.Assert(GetSecondaryShift(totalSlots) == secondaryShift); Debug.Assert(totalSlots == table.Length); } // Traverse the table. Make sure that count and usedSlots are right, and that // each slot looks reasonable. int expectedCount = 0, expectedUsed = 0, initialBucket, skip; if (table != null) { for (int i = 0; i < totalSlots; ++i) { Slot slot = table[i]; if (slot.Empty) { // Empty slot if (slot.Collision) ++expectedUsed; Debug.Assert(object.Equals(default(T), slot.item)); } else { // not empty. ++expectedCount; ++expectedUsed; Debug.Assert(slot.HashValue != 0); Debug.Assert(GetHashValues(slot.item, out initialBucket, out skip) == slot.HashValue); if (initialBucket != i) Debug.Assert(table[initialBucket].Collision); } } } Debug.Assert(expectedCount == count); Debug.Assert(expectedUsed == usedSlots); } #endif //DEBUG } }