Dictionary

2 분 소요

C# Dictionary

C# Dictionary Reference Source : 출처 MicroSoft

Dictionary의 HashTable의 bucket 증가 수치

public static readonly int[] primes = {
    3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
    1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
    17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
    187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
    1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};

위와 같이 prime수로 bucket 크기를 늘려 나간다.

Dictionary의 중복키 예방

Insert

private void Insert(TKey key, TValue value, bool add)
{
    if (key == null)
    {
        ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
    }

    if (buckets == null) Initialize(0);
    int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
    int targetBucket = hashCode % buckets.Length;

#if FEATURE_RANDOMIZED_STRING_HASHING
    int collisionCount = 0;
#endif

    for (int i = buckets[targetBucket]; i >= 0; i = entries[i].next)
    {
        if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key))
        {
            if (add)
            {
                ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_AddingDuplicate);
            }
            entries[i].value = value;
            version++;
            return;
        }

#if FEATURE_RANDOMIZED_STRING_HASHING
        collisionCount++;
#endif
    }
    int index;
    if (freeCount > 0)
    {
        index = freeList;
        freeList = entries[index].next;
        freeCount--;
    }
    else
    {
        if (count == entries.Length)
        {
            Resize();
            targetBucket = hashCode % buckets.Length;
        }
        index = count;
        count++;
    }

    entries[index].hashCode = hashCode;
    entries[index].next = buckets[targetBucket];
    entries[index].key = key;
    entries[index].value = value;
    buckets[targetBucket] = index;
    version++;

#if FEATURE_RANDOMIZED_STRING_HASHING
#if FEATURE_CORECLR
    // In case we hit the collision threshold we'll need to switch to the comparer which is using randomized string hashing
    // in this case will be EqualityComparer<string>.Default.
    // Note, randomized string hashing is turned on by default on coreclr so EqualityComparer<string>.Default will 
    // be using randomized string hashing

    if (collisionCount > HashHelpers.HashCollisionThreshold && comparer == NonRandomizedStringEqualityComparer.Default) 
    {
        comparer = (IEqualityComparer<TKey>) EqualityComparer<string>.Default;
        Resize(entries.Length, true);
    }
#else
    if(collisionCount > HashHelpers.HashCollisionThreshold && HashHelpers.IsWellKnownEqualityComparer(comparer)) 
    {
        comparer = (IEqualityComparer<TKey>) HashHelpers.GetRandomizedEqualityComparer(comparer);
        Resize(entries.Length, true);
    }
#endif // FEATURE_CORECLR 
#endif
}

buckets은 배열 index가 hash값, 이 배열의 값은 entries의 index임.

insert시 중복 된 키를 가질 경우 entry의 next로 연결리스트처럼 다음 중복 키를 가지는 데이터의 index를 지정해두어 탐색 할 수 있게 함.

Resize

private void Resize(int newSize, bool forceNewHashCodes)
{
    Contract.Assert(newSize >= entries.Length);
    int[] newBuckets = new int[newSize];
    for (int i = 0; i < newBuckets.Length; i++) newBuckets[i] = -1;
    Entry[] newEntries = new Entry[newSize];
    Array.Copy(entries, 0, newEntries, 0, count);
    if (forceNewHashCodes)
    {
        for (int i = 0; i < count; i++)
        {
            if (newEntries[i].hashCode != -1)
            {
                newEntries[i].hashCode = (comparer.GetHashCode(newEntries[i].key) & 0x7FFFFFFF);
            }
        }
    }
    for (int i = 0; i < count; i++)
    {
        if (newEntries[i].hashCode >= 0)
        {
            int bucket = newEntries[i].hashCode % newSize;
            newEntries[i].next = newBuckets[bucket];
            newBuckets[bucket] = i;
        }
    }
    buckets = newBuckets;
    entries = newEntries;
}

bucket의 크기를 늘릴 때마다 새롭게 중복 키에 대한 처리를 하는 것으로 보인다.

Dictionary의 용량이 커질 수록 Resize의 퍼포먼스가 안좋아 질 수 있으므로.. Dictionary초기화 시 capacity를 설정하는게 좋아보인다..(물론 prime number로!!)

Find

private int FindEntry(TKey key)
{
    if (key == null)
    {
        ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
    }

    if (buckets != null)
    {
        int hashCode = comparer.GetHashCode(key) & 0x7FFFFFFF;
        for (int i = buckets[hashCode % buckets.Length]; i >= 0; i = entries[i].next)
        {
            if (entries[i].hashCode == hashCode && comparer.Equals(entries[i].key, key)) return i;
        }
    }
    return -1;
}

find에서 entries의 next로 값을 찾는 것을 볼 수 있다.

태그:

카테고리:

업데이트: