Results 1 to 40 of 87

Thread: Simple and fast, lightweight HashList-Class (no APIs)

Threaded View

  1. #1

    Thread Starter
    PowerPoster
    Join Date
    Jun 2013
    Posts
    7,454

    Simple and fast, lightweight HashList-Class (no APIs)

    Not much to add to the Threads Title...

    the implementation of cHashList comes in only about 100 lines of code, and it can
    be used as a VB.Collection-compatible replacement with much better performance.

    The following Methods/Properties are supported:
    - CompareMode (to switch between case-insensitive and case-sensitive compares, default-behaviour is like the VB.Collection)
    - UniqueKeys (to allow multiple entries with the same Key, when switched from the True-defaultsetting to False)
    - Count
    - Add(Item As Variant, Optional Key As String)
    - Exists(Key As String)
    - IndexByKey(Key As String)
    - KeyByIndex(ByVal IndexOneBased As Long)
    - Item(KeyOrOneBasedIndex As Variant) ... (Get, Let and Set)
    - ReInit(Optional ByVal ExpectedItemCount As Long = 5000)

    Indexed access (for both, Keys and Items) is by orders of magnitude faster than the VB-Collection.
    What's possible now as well (compared to the VB-Collection) is the ability to overwrite Item-Values
    (at a given Index- or Key-position).

    Note, that in the above List a Remove-Method is missing -
    I've left this out for two reasons:
    1) to demonstrate that a simplified HashList-Implementation doesn't necessarily need to be a linked List
    2) because Remove is not used very often, when a Collection is used primarily as a fast "Key-Lookup-Container"
    ... (for Queue- or Stack-scenarios one can always use the normal VBA.Collection)

    Performance is about 6 times as fast, when Key-Value-pairs are added -
    and about twice as fast when Items are retrieved per Key-Lookup...

    Here's a ScreenShot:


    Here's the Class-Code of cHashList:
    Code:
    Option Explicit
     
    Private Type DataTableEntry
      Key As String
      Value As Variant
    End Type
    Private Type HashTableEntry
      DataIndexes() As Long
    End Type
     
    Private DataTable() As DataTableEntry, HashTable() As HashTableEntry
    Private mCount As Long, mDTUBound As Long, mHashTableSize As Long
     
    Public CompareMode As VbCompareMethod, UniqueKeys As Boolean
    
    Private Sub Class_Initialize()
      UniqueKeys = True
      CompareMode = vbTextCompare
      ReInit
    End Sub
    
    Public Sub ReInit(Optional ByVal ExpectedItemCount As Long = 5000)
      mHashTableSize = 8
      Do Until mHashTableSize * 5 > ExpectedItemCount: mHashTableSize = mHashTableSize * 2: Loop
      ReDim HashTable(0 To mHashTableSize - 1)
      
      Dim i As Long
      For i = 0 To UBound(HashTable): ReDim HashTable(i).DataIndexes(0 To 0): Next
      mDTUBound = 16: ReDim DataTable(0 To mDTUBound)
      mCount = 0
    End Sub
    
    Public Property Get Count() As Long
      Count = mCount
    End Property
    
    Public Function Exists(Key As String) As Boolean
      Exists = FindIndex(Key, CalcHash(Key)) > 0
    End Function
    Public Function IndexByKey(Key As String) As Long
      IndexByKey = FindIndex(Key, CalcHash(Key))
    End Function
    
    Public Sub Add(Item, Optional Key As String)
    Dim HashValue As Long, UB As Long
      HashValue = CalcHash(Key)
      If UniqueKeys Then If FindIndex(Key, HashValue) Then Err.Raise 457
      
      'prolong and add to the new entries to the DataTable-Array
      mCount = mCount + 1
      If mDTUBound < mCount Then mDTUBound = mDTUBound * 1.5: ReDim Preserve DataTable(0 To mDTUBound)
      DataTable(mCount).Key = Key
      DataTable(mCount).Value = Item
      
      'add the new DataIndex to the proper Hash-Buckets .DataIndexes-Array
      With HashTable(HashValue)
        UB = UBound(.DataIndexes): UB = UB + 1
        ReDim Preserve .DataIndexes(0 To UB)
        .DataIndexes(UB) = mCount
      End With
    End Sub
    
    Public Property Get KeyByIndex(ByVal IndexOneBased As Long)
      If IndexOneBased < 1 Or IndexOneBased > mCount Then Err.Raise 9
      KeyByIndex = DataTable(IndexOneBased).Key
    End Property
    
    Public Property Get Item(KeyOrOneBasedIndex)
    Dim Index As Long
      If VarType(KeyOrOneBasedIndex) = vbString Then
        Index = FindIndex(KeyOrOneBasedIndex, CalcHash(KeyOrOneBasedIndex))
        If Index = 0 Then Err.Raise 457
      Else
        Index = KeyOrOneBasedIndex
        If Index < 1 Or Index > mCount Then Err.Raise 9
      End If
      If IsObject(DataTable(Index).Value) Then
        Set Item = DataTable(Index).Value
      Else
        Item = DataTable(Index).Value
      End If
    End Property
    
    Public Property Let Item(KeyOrOneBasedIndex, NewItem)
    Dim Index As Long
      If VarType(KeyOrOneBasedIndex) = vbString Then
        Index = FindIndex(KeyOrOneBasedIndex, CalcHash(KeyOrOneBasedIndex))
        If Index = 0 Then Err.Raise 457
      Else
        Index = KeyOrOneBasedIndex
        If Index < 1 Or Index > mCount Then Err.Raise 9
      End If
      If IsObject(NewItem) Then
        Set DataTable(Index).Value = NewItem
      Else
        DataTable(Index).Value = NewItem
      End If
    End Property
    Public Property Set Item(KeyOrOneBasedIndex, NewItem)
      Item(KeyOrOneBasedIndex) = NewItem
    End Property
    
    Private Function FindIndex(Key, ByVal HashValue As Long) As Long
    Dim i As Long, CM As VbCompareMethod
      With HashTable(HashValue)
        CM = CompareMode
        For i = 1 To UBound(.DataIndexes)
          If StrComp(Key, DataTable(.DataIndexes(i)).Key, CM) = 0 Then
            FindIndex = .DataIndexes(i): Exit Function
          End If
        Next
      End With 'returns Zero, when no Key can be found
    End Function
    
    Private Function CalcHash(Key) As Long
    Dim i As Long, L As Long, B() As Byte
      If CompareMode Then B = LCase$(Key) Else B = Key
      L = 7919
        For i = UBound(B) To 0 Step -1: L = (i + B(i) + L) * 37 And &H7FFFFF: Next
      CalcHash = L * B(0) Mod mHashTableSize
    End Function
    
    Friend Sub CheckHashDistribution()
    Dim i As Long, UB As Long, cc As Long, Min As Long, Max As Long
      Min = &H7FFFFFFF
      For i = 0 To UBound(HashTable)
        UB = UBound(HashTable(i).DataIndexes)
        If UB Then
          If Min > UB Then Min = UB
          If Max < UB Then Max = UB
          cc = cc + 1
        End If
      Next
      Debug.Print "Distribution over a HashTable with"; UBound(HashTable) + 1; "slots:"
      Debug.Print "Used-HashSlots:"; cc
      Debug.Print "Min-Entries:"; Min
      Debug.Print "Max-Entries:"; Max
    End Sub
    And here the Code of the Test-Form:
    Code:
    Option Explicit
    
    Private Const TestEntryCount As Long = 100000
    
    Private C As Collection, H As cHashList
    
    Private Sub Form_Click()
    Dim i As Long, T!, Item
      AutoRedraw = True
      Cls
      Print "Count of Test-Entries:"; TestEntryCount; vbLf
      
      Set C = New Collection
      Set H = New cHashList
          H.ReInit TestEntryCount
          
      T = Timer
        For i = 1 To TestEntryCount
          C.Add i, "K" & i
        Next
      Print "Collection-Add:", Timer - T & "sec"
      
      T = Timer
        For i = 1 To TestEntryCount
          H.Add i, "K" & i
        Next
      Print "cHashList-Add:", Timer - T & "sec"; vbLf
      
      T = Timer
        For i = 1 To TestEntryCount
          Item = C.Item("K" & i)
        Next
      Print "Collection-ItemByKey:", Timer - T & "sec"
      
      T = Timer
        For i = 1 To TestEntryCount
          Item = H.Item("K" & i)
        Next
      Print "cHashList-ItemByKey:", Timer - T & "sec"
     
      Print vbLf; "Indexed access is not compared (it would be much faster per HashList)"
      H.CheckHashDistribution
    End Sub
    Have fun with it...

    Edit: Whilst the above Demo was more a "Concept-Showcase"... below is a performance-optimized version,
    acting more like the MS-Scripting-Dictionary now (with a Param-wise inversed Add-Method, but feel free
    to change it at will - the code-volume is still bearable with about 230 lines in
    cHashD).
    In the meantime I've also added support for Integer-, Double- and Object-Keys - but I think I'll leave it at that -
    since the Class is easy understandable and expandable...

    HashD.zip

    Performance of Case-Insensitive-TextcompareMode for Scripting-Dictionary and cHashD


    Performance of Binary-TextcompareModes for Scripting-Dictionary and cHashD


    Olaf
    Last edited by Schmidt; Sep 4th, 2016 at 12:07 PM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  



Click Here to Expand Forum to Full Width