Private Enum eSearchMethod
StrictWord = 1
QuickWord = 2
Phonetic = 4
Exhaustive = 7 'DO ALL OF THEM!
End Enum
Private Function quasiString(Str1 As String, Str2 As String, Optional Confidence1, Optional Confidence2, Optional Threshold As Double = 0.666, Optional Absolute As Boolean = False, Optional CmpMethod As eSearchMethod, Optional PhoneticThreshold As Long = 1) As Boolean
'accepts two strings to compare: Str1, Str2
'returns confidence (optional)
'use a specific threshold, default 66.6% = 2/3rds
'if Absolute, then both confidence MUST be 1(100%) ("American Dad", "American Idol") problems...
'if textual comparisons fail, Phonetic MAY(additonally optionally) performed,...
Dim asStr() As String ''1.a' substrs
Dim bsStr() As String ''2.b' substrs I don't like multi-dimension arrays!
Dim cmpStrA As Variant
Dim cmpStrB As Variant
Dim WordMatches(1) As Long 'how many matches
Dim WordCompares(1) As Long 'how many searches/comparisons
Dim C As Long
Dim bSideGood(1) As Boolean 'is the character next to the quickword term 'space'?
Dim Confidence(1) As Double
If Len(Str1) And Len(Str2) Then
'prepare the substr arrays
If InStr(Str1, " ") > 0 Then
asStr = Split(Str1, " ")
Else
ReDim asStr(0)
asStr(0) = Str1
End If
'and the other string
If InStr(Str2, " ") > 0 Then
bsStr = Split(Str2, " ")
Else
ReDim bsStr(0)
bsStr(0) = Str2
End If
If (CmpMethod And StrictWord) = StrictWord Then
For Each cmpStrB In bsStr
For Each cmpStrA In asStr
'iterate the total
WordCompares(0) = WordCompares(0) + 1
WordCompares(1) = WordCompares(1) + 1
If LCase$(cmpStrA) = LCase$(cmpStrB) Then
'store/iterate to calc hit %
WordMatches(0) = WordMatches(0) + 1
WordMatches(1) = WordMatches(1) + 1
Exit For
ElseIf (CmpMethod And Phonetic) = Phonetic Then
If ComparePhonetic(cmpStrA, cmpStrB) <= PhoneticThreshold Then
WordMatches(0) = WordMatches(0) + 1
WordMatches(1) = WordMatches(1) + 1
Exit For
End If
End If
Next cmpStrA
Next cmpStrB
End If
cmpStrA = ""
cmpStrB = ""
If (CmpMethod And quickWord) = quickWord Then
For Each cmpStrA In asStr
C = InStr(Str2, cmpStrA)
If C > 0 Then
'check left side for space/nothing
If C = 1 Then
bSideGood(0) = True
ElseIf Mid$(Str2, C - 1, 1) = " " Then
bSideGood(0) = True
End If
'check right side for space/nothing
If C + Len(cmpStrA) >= Len(Str2) Then
bSideGood(1) = True
ElseIf Mid$(Str2, C + Len(cmpStrA), 1) = " " Then
bSideGood(1) = True
End If
If bSideGood(0) And bSideGood(1) Then WordMatches(0) = WordMatches(0) + 1
End If
WordCompares(0) = WordCompares(0) + 1
bSideGood(0) = False
bSideGood(1) = False
Next cmpStrA
For Each cmpStrA In bsStr
C = InStr(Str1, cmpStrA)
If C > 0 Then
'check the left side
If C = 1 Then
bSideGood(0) = True
ElseIf Mid$(Str1, C - 1, 1) = " " Then
bSideGood(0) = True
End If
'check right side
If C + Len(cmpStrA) >= Len(Str1) Then
bSideGood(1) = True
ElseIf Mid$(Str1, C + Len(cmpStrA), 1) = " " Then
bSideGood(1) = True
End If
If bSideGood(0) And bSideGood(1) Then WordMatches(1) = WordMatches(1) + 1
End If
WordCompares(1) = WordCompares(1) + 1
bSideGood(0) = False
bSideGood(1) = False
Next cmpStrA
End If
Debug.Print "bleh"; WordMatches(0); WordCompares(0); WordMatches(1); WordCompares(1),
If (CmpMethod And Exhaustive) = Exhaustive Then
Confidence(0) = WordMatches(0) / WordCompares(0)
Confidence(1) = WordMatches(1) / WordCompares(1)
Else
Confidence(0) = WordMatches(0) / (UBound(bsStr) + 1)
Confidence(1) = WordMatches(1) / (UBound(asStr) + 1)
End If
If Confidence(0) > 1 Then
Confidence(1) = Confidence(1) / Confidence(0)
Confidence(0) = 1
ElseIf Confidence(1) > 1 Then
Confidence(0) = Confidence(0) / Confidence(1)
Confidence(1) = 1
End If
If Absolute And ((Confidence(0) = 1) And (Confidence(1) = 1)) Then
quasiString = True
ElseIf Not Absolute And (Confidence(0) > Threshold) And (Confidence(1) > Threshold) Then
quasiString = True
End If
If Not IsMissing(Confidence1) Then Confidence1 = Confidence(0)
If Not IsMissing(Confidence2) Then Confidence2 = Confidence(1)
End If
End Function
Private Function Soundex(argWord As String)
'don't call directly
Dim workStr As String, i As Long
'// Capitalize it to remove ambiguity
argWord = UCase$(argWord)
'// 1. Retain the first letter of the string
workStr = Left$(argWord, 1)
'// 2. Replacement
' [a, e, h, i, o, u, w, y] = 0
' [b, f, p, v] = 1
' [c, g, j, k, q, s, x, z] = 2
' [d, t] = 3
' [l] = 4
' [m, n] = 5
' [r] = 6
For i = 2 To Len(argWord)
Select Case Mid$(argWord, i, 1)
Case "B", "F", "P", "V"
workStr = workStr & Chr$(49) '// 1
Case "C", "G", "J", "K", "Q", "S", "X", "Z"
workStr = workStr & Chr$(50) '// 2
Case "D", "T"
workStr = workStr & Chr$(51) '// 3
Case "L"
workStr = workStr & Chr$(52) '// 4
Case "M", "N"
workStr = workStr & Chr$(53) '// 5
Case "R"
workStr = workStr & Chr$(56) '// 6
'// A, E, H, I, O, U, W, Y do nothing
End Select
Next i
'// 5. Return the first four bytes padded with 0.
If Len(workStr) > 4 Then
Soundex = workStr
Else
Soundex = workStr & Space$(4 - Len(workStr))
End If
End Function
'// Returns the Minimum of 3 numbers
Private Function min3(ByVal n1 As Long, ByVal n2 As Long, ByVal n3 As Long) As Long
'don't call directly
min3 = n1
If n2 < min3 Then min3 = n2
If n3 < min3 Then min3 = n3
End Function
'// Returns the Levenshtein Distance between 2 strings.
Private Function LevenshteinDistance(argStr1 As String, argStr2 As String) As Long
'don't call directly
Dim m As Long, n As Long
Dim editMatrix() As Long, i As Long, j As Long, cost As Long
Dim str1_i As String, str2_j As String
Dim p() As Long, q() As Long, r As Long
Dim X As Long, y As Long
n = Len(argStr1)
m = Len(argStr2)
'If (n = 0) Or (m = 0) Then Exit Function
ReDim editMatrix(n, m) As Long
For i = 0 To n
editMatrix(i, 0) = i
Next
For j = 0 To m
editMatrix(0, j) = j
Next
For i = 1 To n
str1_i = Mid$(argStr1, i, 1)
For j = 1 To m
str2_j = Mid$(argStr2, j, 1)
If str1_i = str2_j Then
cost = 0
Else
cost = 1
End If
editMatrix(i, j) = min3(editMatrix(i - 1, j) + 1, editMatrix(i, j - 1) + 1, editMatrix(i - 1, j - 1) + cost)
Next j
Next i
LevenshteinDistance = editMatrix(n, m)
Erase editMatrix
End Function
Private Function ComparePhonetic(ByVal inputStr1 As String, ByVal inputStr2 As String) As Long
'single interface function, use this
inputStr1 = Soundex(inputStr1)
inputStr2 = Soundex(inputStr2)
If inputStr1 = vbNullString Then
ComparePhonetic = Len(inputStr2)
ElseIf inputStr2 = vbNullString Then
ComparePhonetic = Len(inputStr1)
Else
ComparePhonetic = LevenshteinDistance(inputStr1, inputStr2)
End If
End Function