Private Sub Form_Load()
Dim strOriginal As String
Dim lngLeft As Long
Dim lngRight As Long
strOriginal = "Hi this is a test (which is nonsense), just to show you an example (lol)."
Do Until InStr(strOriginal, " (") = 0 'I add a space here before the parentheses to clear the leading space too. Because of it items like "data(asd)" wont be removed.
lngLeft = InStr(strOriginal, " (")
lngRight = InStr(lngLeft, strOriginal, ")") + 1
If lngRight > 0 Then
strOriginal = Replace(strOriginal, Mid$(strOriginal, lngLeft, lngRight - lngLeft), "")
Else
Exit Do
End If
Loop
MsgBox strOriginal
End Sub
Dim strInput As String
Dim strRet As String
Dim regEx As Object
strInput = "Hi this is a test (which is nonsense), just to show you an example (lol)."
Set regEx = CreateObject("VBScript.RegExp")
' Set the pattern to remove everything between ( and )
regEx.Pattern = "(\s\(.*?\))"
regEx.Global = True
' Remove everything between ( and )
strRet = regEx.Replace(strInput, "")
Debug.Print strRet
' Cleanup
Set regEx = Nothing
Private Function ReplaceParentheses(InString As String, Optional startPos As Long = 1) As String
Dim temp As String
ReplaceParentheses = InString
Do
startPos = InStr(startPos, ReplaceParentheses, "(") + 1
If startPos <= 1 Then
Exit Function
End If
temp = " (" & Mid$(ReplaceParentheses, startPos, InStr(startPos, ReplaceParentheses, ")") - startPos) & ")"
If InStrB(startPos, ReplaceParentheses, temp) = 0 Then
temp = Mid$(temp, 2)
End If
ReplaceParentheses = Replace(ReplaceParentheses, temp, vbNullString)
Loop
End Function
Code:
Before: Hi this is a test (which is nonsense), just to show you an example (lol).
After: Hi this is a test, just to show you an example.
Before: Hi this is a test(which is nonsense), just to show you an example(lol).
After: Hi this is a test, just to show you an example.
Private Function ReplaceParentheses(InString As String, Optional startPos As Long = 1) As String
Dim temp As String
ReplaceParentheses = InString
Do
startPos = InStr(startPos, ReplaceParentheses, "(") + 1
If startPos <= 1 Then
Exit Function
End If
temp = " (" & Mid$(ReplaceParentheses, startPos, InStr(startPos, ReplaceParentheses, ")") - startPos) & ")"
If InStrB(startPos, ReplaceParentheses, temp) = 0 Then
temp = Mid$(temp, 2)
End If
ReplaceParentheses = Replace(ReplaceParentheses, temp, vbNullString)
Loop
End Function
Does this really work? You are calling ReplaceRarentheses recursively but you aren't passing the required string.
EDIT: yes it does work. I have never seen it done that way.
Private Function ReplaceParentheses(InString As String, Optional startPos As Long = 1) As String
Dim temp As String, toReturn As String
toReturn = InString
Do
startPos = InStr(startPos, toReturn, "(") + 1
If startPos <= 1 Then
Exit Do
End If
temp = " (" & Mid$(toReturn, startPos, InStr(startPos, toReturn, ")") - startPos) & ")"
If InStrB(startPos, toReturn, temp) = 0 Then
temp = Mid$(temp, 2)
End If
toReturn = Replace(toReturn, temp, vbNullString)
Loop
ReplaceParentheses = toReturn
End Function
I'm not sure, just guessing, but I don't think that is a recursive call. If it was there would probably be no need for Do Loop, and, since he is not actually passing parameters along with it (at least InString which is mandatory), it means that it will just use it as a variable (return value), to which he assigned the value at the beginning with: ReplaceParentheses = InString
In case you're working with bigger strings or you have a lot to process:
Code:
Option Explicit
Public Function RemoveParentheses(ByVal Text As String, Optional ByVal Start As Long = 1) As String
Dim I As Long, I2 As Long, J As Long
Dim B As Boolean, L As Long: L = Len(Text)
Do
I = InStr(Start, Text, " (")
I2 = InStr(Start, Text, "(")
If I = 0 Then
I = I2
ElseIf I2 > 0 And I2 < I Then
I = I2
End If
J = InStr(I + 1, Text, ")")
B = I > 0 And J > I And I < L And J <= L
If B Then
Mid$(Text, I, L - J) = Mid$(Text, J + 1, L - J)
L = L - (J - I + 1)
Start = I
End If
Loop While B
RemoveParentheses = Left$(Text, L)
End Function
Private Sub Form_Load()
Debug.Print RemoveParentheses("Hi this is a test (which is nonsense), just to show you an example (lol).")
Debug.Print RemoveParentheses("Hi this is a test(which is nonsense), just to show you an example(lol).")
Debug.Print RemoveParentheses("(Test)A (Test) B (Test) C(Test) ")
End Sub
Happy figuring it out Can be optimized further, but would likely make code longer. Note that I did not use InStrB, so this is Unicode safe.
Version 2
Code:
Public Function RemoveParentheses2(ByVal Text As String, Optional ByVal Start As Long = 1) As String
Dim B As Boolean, I As Long, J As Long
Dim L As Long, S As Long: L = Len(Text): S = Start
Do
I = InStr(S, Text, " (")
B = I > 0 And I < L
If B Then
J = InStr(I + 1, Text, ")")
B = J > I And J <= L
If B Then
Mid$(Text, I, L - J) = Mid$(Text, J + 1, L - J)
L = L - (J - I + 1)
S = I
End If
End If
Loop While B
Do
I = InStr(Start, Text, "(")
B = I > 0 And I < L
If B Then
J = InStr(I + 1, Text, ")")
B = J > I And J <= L
If B Then
Mid$(Text, I, L - J) = Mid$(Text, J + 1, L - J)
L = L - (J - I + 1)
Start = I
End If
End If
Loop While B
RemoveParentheses2 = Left$(Text, L)
End Function
Last edited by Merri; Jun 5th, 2010 at 05:18 AM.
Reason: Removed J2 because it was unnecessary (I was optimizing it but cancelled the idea... because code length increased too much)
Below are 2 of my short versions. Both of them are faster than Merri's versions.
Version 1: Short, use Replace(), Split() and Join() functions
Code:
Function RemoveBetween1(sText As String, sL As String, sR As String) As String
Dim sPart() As String, i As Long
sPart = Split(Replace(Replace(sText, " " & sL, sR), sL, sR), sR)
For i = 1 To UBound(sPart) Step 2: sPart(i) = "": Next
RemoveBetween1 = Join(sPart, "")
End Function
Version 2: Faster, use Split(), Mid(), Instr(), Len() and RTrim() functions
Code:
Function RemoveBetween2(sText As String, sL As String, sR As String) As String
Dim sPart() As String, i As Long
sPart = Split(sText, sL)
RemoveBetween2 = RTrim(sPart(0))
For i = 1 To UBound(sPart)
RemoveBetween2 = RemoveBetween2 & RTrim(Mid$(sPart(i), InStr(sPart(i), sR) + Len(sR)))
Next
End Function
Don't forget to use [CODE]your code here[/CODE] when posting code
If your question was answered please use Thread Tools to mark your thread [RESOLVED]
anhn: what testing code you used for speed? I don't find your code faster even under IDE (which is where my code is normally much slower).
IDE
RemoveParentheses2: 25.57747 ms
RemoveBetween2: 91.07186 ms
RemoveBetween1: 184.57736 ms
Compiled with advanced optimizations on
RemoveParentheses2: 16.21600 ms
RemoveBetween2: 37.44149 ms
RemoveBetween1: 81.12066 ms
Code:
Option Explicit
Private Sub Command1_Click()
Dim strTest As String, I As Long
Const TESTCOUNT = 10000
Const TESTSTRING = "Hi this is a test (which is nonsense), just to show you an example (lol)."
Const TESTFORMAT = "0.00000 ms"
Timing = 0
For I = 1 To TESTCOUNT
strTest = RemoveParentheses(TESTSTRING)
Next I
List1.AddItem "RemoveParentheses: " & Format$(Timing * 1000, TESTFORMAT)
Debug.Print """" & strTest & """"
Debug.Print List1.List(List1.NewIndex)
Timing = 0
For I = 1 To TESTCOUNT
strTest = RemoveParentheses2(TESTSTRING)
Next I
List1.AddItem "RemoveParentheses2: " & Format$(Timing * 1000, TESTFORMAT)
Debug.Print """" & strTest & """"
Debug.Print List1.List(List1.NewIndex)
Timing = 0
For I = 1 To TESTCOUNT
strTest = RemoveBetween1(TESTSTRING, "(", ")")
Next I
List1.AddItem "RemoveBetween1: " & Format$(Timing * 1000, TESTFORMAT)
Debug.Print """" & strTest & """"
Debug.Print List1.List(List1.NewIndex)
Timing = 0
For I = 1 To TESTCOUNT
strTest = RemoveBetween2(TESTSTRING, "(", ")")
Next I
List1.AddItem "RemoveBetween2: " & Format$(Timing * 1000, TESTFORMAT)
Debug.Print """" & strTest & """"
Debug.Print List1.List(List1.NewIndex)
List1.ListIndex = List1.NewIndex
End Sub
If the test string was bigger then the difference would be much greater, because your code relies in methods that require a lot of memory (first one: creation of new strings with Replace, Split, looping & Join... second one: Split, RTrim, string concatenation). The functions I provided both only create two new strings in a single call (ByVal Text, Left$... using Mid$ on both sides does not create a new string, it is a direct memory copy).
Len(TESTSTRING) = 73
RemoveParentheses: 123.23211 ms
RemoveParentheses2: 125.63364 ms
RemoveBetween1: 416.60941 ms
RemoveBetween2: 237.37265 ms
-----------------------------
Len(TESTSTRING) = 1168
RemoveParentheses: 1765.64007 ms
RemoveParentheses2: 1654.47126 ms
RemoveBetween1: 1405.39777 ms
RemoveBetween2: 845.45206 ms
-----------------------------
Len(TESTSTRING) = 2336
RemoveParentheses: 6291.80334 ms
RemoveParentheses2: 6051.99326 ms
RemoveBetween1: 2770.82211 ms
RemoveBetween2: 1730.19534 ms
-----------------------------
Don't forget to use [CODE]your code here[/CODE] when posting code
If your question was answered please use Thread Tools to mark your thread [RESOLVED]
Public Function DeleteBetween(ByVal sText As String, sBefore As String, sAfter As String) As String
Dim P1 As Long, P2 As Long, L As Long, T As Long
Dim L1 As Long: L1 = LenB(sBefore)
Dim L2 As Long: L2 = LenB(sAfter)
If LenB(sText) = 0 Or L1 = 0 Or L2 = 0 Then Exit Function
P2 = 1 - L2
Do
P2 = P2 + L2: P1 = P2 - 1
Do: P1 = InStrB(P1 + 1, sText, sBefore)
Loop While (P1 And 1) = 0 And (P1 > 0)
If P1 = 0 Then P1 = LenB(sText) + 1
L = P1 - P2
If L Then MidB$(sText, T + 1, L) = MidB$(sText, P2, L)
T = T + L
P2 = P1 + L1 - 1
Do: P2 = InStrB(P2 + 1, sText, sAfter)
Loop While (P2 And 1) = 0 And (P2 > 0)
Loop While P2
If T Then DeleteBetween = LeftB$(sText, T)
End Function
You can't time them all within one sub. You'll get inconsistent results that typically favor later calls. I suggest a command button for each test(individually).
Yet another way:
vb Code:
Public Sub RemoveParen(sData$)
Dim X As Long, Y As Long, lRemoved As Long
X = InStr(sData, " (")
Do While X > 0
Y = InStr(X, sData, ")")
If Y > 0 Then
Mid$(sData, X) = Mid$(sData, Y + 1)
lRemoved = lRemoved + (Y - X) - 1
X = InStr(X, sData, " (")
Else
Exit Do
End If
Loop
If lRemoved > 0 Then sData = Left$(sData, Len(sData) - lRemoved)
The times become more consistent as you hit the command button multiple times. I think the inconsistency most often occurs before multiprocessing of Windows gives more processor time to the application. It would help to increase the priority the application as well, but getting rid of the background noise is pretty hard. A way to cope with the issue is to loop the same call multiple times to reduce the noise effect for a single fast call. Also, a more intelligent benchmarker would probably track minimum time and average time.
For these benchmarks we have here the differences are often big enough to notice even with the inaccuracy of timing, so "good enough".
It likely also has to do with cache. Having a bunch of other code within the same procedure is going to generate more cache misses, which otherwise wouldn't be as numerable. This just adds to the 'noise'.
The results will be significantly different if you time 'all tests in one procedure' vs 'all the tests in their own procedures, called independently of one another'. This is what I've found true for every timing test I've ever done.
This is really important with actual 'functional code'. What I mean by that is a complete project that does something, rather than isolating your procedure(s) and timing them independently of their contextual use in the 'functional program' will give marked differences. When you isolate and time them individually they'll be faster.
For instance, timing the RemoveParentheses in context of the actual use of it. Like getting the string(say, from a database), calling RemoveParentheses, and then doing something with the result(like transmitting it over a socket, or saving it back to a database) vs simply calling RemoveParentheses by itself(isolated from context: no databases, no sockets). The first example will show that RemoveParenthese is taking longer than the latter scenario. The difference will be significant.
Just for fun! One more version. (I didn't test with InstrB)
Code:
Function RemoveBetween3(ByVal sText As String, sL As String, sR As String) As String
Dim L As Long, R As Long, n As Long
L = InStr(sText, sL)
If L = 0 Then RemoveBetween3 = sText: Exit Function
n = L - 1
Do
R = InStr(L + Len(sL), sText, sR)
If R = 0 Then
Mid$(sText, n + 1, Len(sText) + 1 - L) = Mid$(sText, L)
n = n + Len(sText) + 1 - L
Else
R = R + Len(sR)
L = InStr(R, sText, sL)
If L = 0 Then
Mid$(sText, n + 1, Len(sText) + 1 - R) = Mid$(sText, R)
n = n + Len(sText) + 1 - R
Else
Mid$(sText, n + 1, L - R) = Mid$(sText, R, L - R)
n = n + L - R
End If
End If
Loop Until L = 0 Or R = 0
RemoveBetween3 = Mid$(sText, 1, n)
End Function
Code:
Len(TESTSTRING) = 73
RemoveParentheses2: 55.057 ms
RemoveBetween2: 79.392 ms
RemoveBetween3: 42.980 ms
-------------------
Len(TESTSTRING) = 146
RemoveParentheses2: 88.159 ms
RemoveBetween2: 108.978 ms
RemoveBetween3: 71.216 ms
-------------------
Len(TESTSTRING) = 292
RemoveParentheses2: 196.193 ms
RemoveBetween2: 185.686 ms
RemoveBetween3: 130.388 ms
-------------------
Len(TESTSTRING) = 584
RemoveParentheses2: 488.680 ms
RemoveBetween2: 370.618 ms
RemoveBetween3: 246.990 ms
-------------------
Len(TESTSTRING) = 1168
RemoveParentheses2: 1573.098 ms
RemoveBetween2: 760.205 ms
RemoveBetween3: 482.780 ms
-------------------
Len(TESTSTRING) = 2336
RemoveParentheses2: 5699.869 ms
RemoveBetween2: 1634.489 ms
RemoveBetween3: 958.731 ms
-------------------
Don't forget to use [CODE]your code here[/CODE] when posting code
If your question was answered please use Thread Tools to mark your thread [RESOLVED]
RemoveBetween3 is fast but the string returned is not the same as the others, it leaves blank spaces in it.
Code:
Hi this is a test, just to show you an example.
Hi this is a test , just to show you an example .
Code:
Len(TESTSTRING) = 73
RemoveParentheses: 8.517 ms
RemoveParentheses2: 7.739 ms
RemoveBetween1: 44.601 ms
RemoveBetween2: 22.175 ms
RemoveBetween3: 6.424 ms
DeleteBetween: 13.425 ms
Len(TESTSTRING) = 23360
RemoveParentheses: 33432.446 ms
RemoveParentheses2: 33321.829 ms
RemoveBetween1: 10811.815 ms
RemoveBetween2: 10319.362 ms
RemoveBetween3: 1308.263 ms
DeleteBetween: 1983.251 ms
Option Explicit
Private Declare Function ArrPtr Lib "msvbvm60" Alias "VarPtr" (Arr() As Any) As Long
Private Declare Sub PutMem4 Lib "msvbvm60" (ByVal Ptr As Long, ByVal Value As Long)
Public Function NullBetween(ByVal Text As String, Optional Before As String = "(", Optional After As String = ")") As String
Dim F As Long, I As Long, J As Long, L As Long, LA As Long, LB As Long, S As Long
Dim C() As Integer, CP As Long, H(0 To 5) As Long
Do: I = InStrB(I + 1, Text, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then
NullBetween = Text
Else
LB = LenB(Before)
J = I + LB - 1
Do: J = InStrB(J + 1, Text, After)
Loop Until (J And 1) = 1 Or J = 0
If J = 0 Then
NullBetween = Text
Else
LA = LenB(After)
H(0) = 1: H(1) = 2: H(3) = StrPtr(Text): H(4) = Len(Text)
CP = ArrPtr(C)
PutMem4 CP, VarPtr(H(0))
S = (I - 3) \ 2
Do While S >= 0
If C(S) = 32 Then S = S - 1 Else Exit Do
Loop
Do
I = J + LA - 1
F = I \ 2
Do: I = InStrB(I + 1, Text, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then Exit Do
L = (I - 3) \ 2
Do While L >= F
If C(L) = 32 Then L = L - 1 Else Exit Do
Loop
For F = F To L
S = S + 1
C(S) = C(F)
Next F
J = I + LB - 1
Do: J = InStrB(J + 1, Text, After)
Loop Until (J And 1) = 1 Or J = 0
Loop While J
For F = F To UBound(C)
S = S + 1
C(S) = C(F)
Next F
PutMem4 CP, 0
NullBetween = Left$(Text, S + 1)
End If
End If
End Function
Note: this function is intelligent enough to do the same thing RTrim$ does in RemoveBetween2. So you do not need a dual call for this function.
Update!
Improved a little bit:
Code:
Public Function NullBetween2(Text As String, Optional Before As String = "(", Optional After As String = ")") As String
Dim F As Long, I As Long, J As Long, L As Long, LA As Long, LB As Long, S As Long
Dim C() As Integer, CP As Long, H(0 To 5) As Long
NullBetween2 = Text
Do: I = InStrB(I + 1, NullBetween2, Before)
Loop Until (I And 1) = 1 Or I = 0
If I Then
LB = LenB(Before)
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween2, After)
Loop Until (J And 1) = 1 Or J = 0
If J Then
LA = LenB(After)
H(0) = 1: H(1) = 2: H(3) = StrPtr(NullBetween2): H(4) = Len(NullBetween2)
CP = ArrPtr(C)
PutMem4 CP, VarPtr(H(0))
S = (I - 3) \ 2
Do While S >= 0
If C(S) = 32 Then S = S - 1 Else Exit Do
Loop
Do
I = J + LA - 1
F = I \ 2
Do: I = InStrB(I + 1, NullBetween2, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then Exit Do
L = (I - 3) \ 2
Do While L >= F
If C(L) = 32 Then L = L - 1 Else Exit Do
Loop
For F = F To L
S = S + 1
C(S) = C(F)
Next F
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween2, After)
Loop Until (J And 1) = 1 Or J = 0
Loop While J
For F = F To UBound(C)
S = S + 1
C(S) = C(F)
Next F
PutMem4 CP, 0
NullBetween2 = Left$(NullBetween2, S + 1)
End If
End If
End Function
Last edited by Merri; Jun 7th, 2010 at 03:26 PM.
Reason: Small bugfixes to both versions (fixed space character handling)
I love to read threads like this one. It brings out the best in several of the top VB programmers that I know. 'Tis a same that Logophobic and Ellis Dee did not throw their hats into the ring for salt and pepper.
The secret to success is that the problem was well defined by OP in Post #1.
For sure, with some help of API calls, you can make it much faster.
But with native VB6/VBA functions, RemoveBetween3 is the winner.
That function can be improved more if changing all character functions to byte functions.
Don't forget to use [CODE]your code here[/CODE] when posting code
If your question was answered please use Thread Tools to mark your thread [RESOLVED]
The API calls included are from VB6 runtime, which you can't really call not being native VB6
But I don't take the RemoveBetween3 declaration if you want to rule out VarPtr/ArrPtr & PutMem4: DeleteBetween is faster in what I tested. Did I miss anything? From what I see you have to dual call RemoveBetween3 just like DeleteBetween to get the wanted results. So, byte improvement is required.
Public Function NullBetween2_noapi(Text As String, Optional Before As String = "(", Optional After As String = ")") As String
Dim F As Long, I As Long, J As Long, L As Long, LA As Long, LB As Long, S As Long
Dim C() As Byte
Do: I = InStrB(I + 1, Text, Before)
Loop Until (I And 1) = 1 Or I = 0
If I Then
LB = LenB(Before)
J = I + LB - 1
Do: J = InStrB(J + 1, Text, After)
Loop Until (J And 1) = 1 Or J = 0
If J Then
LA = LenB(After)
C = Text
S = (I - 3)
Do While S >= 0
If C(S) = 32 And C(S + 1) = 0 Then S = S - 2 Else Exit Do
Loop
S = S + 1
Do
I = J + LA - 1
F = I
Do: I = InStrB(I + 1, Text, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then Exit Do
L = (I - 3)
Do While L >= F
If C(L) = 32 And C(L + 1) = 0 Then L = L - 2 Else Exit Do
Loop
For F = F To L + 1
S = S + 1
C(S) = C(F)
Next F
J = I + LB - 1
Do: J = InStrB(J + 1, Text, After)
Loop Until (J And 1) = 1 Or J = 0
Loop While J
For F = F To UBound(C)
S = S + 1
C(S) = C(F)
Next F
If S > 0 Then ReDim Preserve C(S): NullBetween2_noapi = C
Else
NullBetween2_noapi = Text
End If
Else
NullBetween2_noapi = Text
End If
End Function
It is faster than RemoveBetween3, and in most tests faster than RemoveBetween3B:
Code:
Function RemoveBetween3B(sText As String, sL As String, sR As String) As String
Dim L As Long, R As Long, n As Long
RemoveBetween3B = sText
Do: L = InStrB(L + 1, RemoveBetween3B, sL)
Loop Until (L And 1) = 1 Or L = 0
If L = 0 Then Exit Function
n = L - 1
Do
R = L + LenB(sL) - 1
Do: R = InStrB(R + 1, RemoveBetween3B, sR)
Loop Until (R And 1) = 1 Or R = 0
If R = 0 Then
MidB$(RemoveBetween3B, n + 1, LenB(RemoveBetween3B) + 1 - L) = MidB$(RemoveBetween3B, L)
n = n + LenB(RemoveBetween3B) + 1 - L
Else
R = R + LenB(sR)
L = R - 1
Do: L = InStrB(L + 1, RemoveBetween3B, sL)
Loop Until (L And 1) = 1 Or L = 0
If L = 0 Then
MidB$(RemoveBetween3B, n + 1, LenB(RemoveBetween3B) + 1 - R) = MidB$(RemoveBetween3B, R)
n = n + LenB(RemoveBetween3B) + 1 - R
Else
MidB$(RemoveBetween3B, n + 1, L - R) = MidB$(RemoveBetween3B, R, L - R)
n = n + L - R
End If
End If
Loop Until L = 0 Or R = 0
RemoveBetween3B = LeftB$(RemoveBetween3B, n)
End Function
Update!
Second API free version, this is a bit faster in some cases, but slower in some others. It even beats NullBetween2 in one case!
Code:
Public Function NullBetween2_noapi2(Text As String, Optional Before As String = "(", Optional After As String = ")") As String
Dim F As Long, I As Long, J As Long, L As Long, LA As Long, LB As Long, S As Long
Dim C() As Byte
NullBetween2_noapi2 = Text
Do: I = InStrB(I + 1, NullBetween2_noapi2, Before)
Loop Until (I And 1) = 1 Or I = 0
If I Then
LB = LenB(Before)
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween2_noapi2, After)
Loop Until (J And 1) = 1 Or J = 0
If J Then
LA = LenB(After)
C = NullBetween2_noapi2
S = (I - 3)
Do While S >= 0
If C(S) = 32 And C(S + 1) = 0 Then S = S - 2 Else Exit Do
Loop
S = S + 1
Do
I = J + LA - 1
F = I
Do: I = InStrB(I + 1, NullBetween2_noapi2, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then Exit Do
L = (I - 3)
Do While L >= F
If C(L) = 32 And C(L + 1) = 0 Then L = L - 2 Else Exit Do
Loop
L = L + 2 - F
If L Then MidB$(NullBetween2_noapi2, S + 2, L) = MidB$(NullBetween2_noapi2, F + 1, L)
F = F + L
S = S + L
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween2_noapi2, After)
Loop Until (J And 1) = 1 Or J = 0
Loop While J
L = LenB(NullBetween2_noapi2) - F
If L Then MidB$(NullBetween2_noapi2, S + 2, L) = MidB$(NullBetween2_noapi2, F + 1, L)
S = S + L
NullBetween2_noapi2 = LeftB$(NullBetween2_noapi2, S + 1)
End If
End If
End Function
MidB$ seems to be good for copying long blocks of data, so I tested around and...
Code:
Public Function NullBetween3(Text As String, Optional Before As String = "(", Optional After As String = ")") As String
Dim F As Long, I As Long, J As Long, L As Long, LA As Long, LB As Long, S As Long
Dim C() As Integer, CP As Long, H(0 To 5) As Long
NullBetween3 = Text
Do: I = InStrB(I + 1, NullBetween3, Before)
Loop Until (I And 1) = 1 Or I = 0
If I Then
LB = LenB(Before)
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween3, After)
Loop Until (J And 1) = 1 Or J = 0
If J Then
LA = LenB(After)
H(0) = 1: H(1) = 2: H(3) = StrPtr(NullBetween3): H(4) = Len(NullBetween3)
CP = ArrPtr(C)
PutMem4 CP, VarPtr(H(0))
S = (I - 3) \ 2
Do While S >= 0
If C(S) = 32 Then S = S - 1 Else Exit Do
Loop
Do
I = J + LA - 1
F = I \ 2
Do: I = InStrB(I + 1, NullBetween3, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then Exit Do
L = (I - 3) \ 2
Do While L >= F
If C(L) = 32 Then L = L - 1 Else Exit Do
Loop
If (L - F) > 95 Then
L = L + 1 - F
Mid$(NullBetween3, S + 2, L) = Mid$(NullBetween3, F + 1, L)
F = F + L
S = S + L
Else
For F = F To L
S = S + 1
C(S) = C(F)
Next F
End If
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween3, After)
Loop Until (J And 1) = 1 Or J = 0
Loop While J
If (UBound(C) - F) > 95 Then
L = UBound(C) + 1 - F
Mid$(NullBetween3, S + 2, L) = Mid$(NullBetween3, F + 1, L)
S = S + L
Else
For F = F To UBound(C)
S = S + 1
C(S) = C(F)
Next F
End If
PutMem4 CP, 0
NullBetween3 = Left$(NullBetween3, S + 1)
End If
End If
End Function
Yay for being even faster with large amounts of data. This function could be improved by simply finding out where is the crucial point where Mid$ becomes faster than moving Integers.
Edit!
NullBetween3 updated, biggest change is replacing 999 with 95, thus 96 characters is a midpoint where Mid$ begins to become faster than Integer array.
Third post in a row, but this is interesting enough: I found a file called enwik8, which is a 100 000 000 byte file of Wikipedia text. So, I'd guess this would be a good file to test with as it contains a lot of natural language?
Update!
This actually brings up a new question: how should internal parentheses be handled? If we have a string (((text here))) should it all be wiped out? The current implementations end up with )).
Would you eat your pants if I told I got another 30% off?
Code:
Option Explicit
Private Declare Function ArrPtr Lib "msvbvm60" Alias "VarPtr" (Arr() As Any) As Long
Private Declare Sub PutMem4 Lib "msvbvm60" (ByVal Ptr As Long, ByVal Value As Long)
Private Declare Sub PutMem8 Lib "msvbvm60" (ByVal Ptr As Long, ByVal Value As Currency)
Public Function NullBetween4(Text As String, Optional Before As String = "(", Optional After As String = ")") As String
Dim F As Long, I As Long, J As Long, L As Long, LA As Long, LB As Long, S As Long
Dim B() As Byte, BL As Long, BP As Long, C() As Integer, CP As Long, H(0 To 5) As Long
NullBetween4 = Text
Do: I = InStrB(I + 1, NullBetween4, Before)
Loop Until (I And 1) = 1 Or I = 0
If I Then
LB = LenB(Before)
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween4, After)
Loop Until (J And 1) = 1 Or J = 0
If J Then
LA = LenB(After)
H(0) = 1: H(1) = 2: H(3) = StrPtr(NullBetween4): H(4) = Len(NullBetween4)
CP = ArrPtr(C)
PutMem4 CP, VarPtr(H(0))
S = (I - 3) \ 2
Do While S >= 0
If C(S) = 32 Then S = S - 1 Else Exit Do
Loop
Do
I = J + LA - 1
F = I \ 2
Do: I = InStrB(I + 1, NullBetween4, Before)
Loop Until (I And 1) = 1 Or I = 0
If I = 0 Then Exit Do
L = (I - 3) \ 2
Do While L >= F
If C(L) = 32 Then L = L - 1 Else Exit Do
Loop
If (L - F) > 95 Then
L = L + 1 - F
Mid$(NullBetween4, S + 2, L) = Mid$(NullBetween4, F + 1, L)
F = F + L
S = S + L
Else
For F = F To L
S = S + 1
C(S) = C(F)
Next F
End If
J = I + LB - 1
Do: J = InStrB(J + 1, NullBetween4, After)
Loop Until (J And 1) = 1 Or J = 0
Loop While J
If (UBound(C) - F) > 95 Then
L = UBound(C) + 1 - F
Mid$(NullBetween4, S + 2, L) = Mid$(NullBetween4, F + 1, L)
S = S + L
Else
For F = F To UBound(C)
S = S + 1
C(S) = C(F)
Next F
End If
If S >= 0 Then
L = LenB(NullBetween4)
If L >= 4096 Then
S = S + 1
C(S) = 0
S = S * 2
BP = H(3) + S + 2
BL = L - S
B = vbNullString
PutMem8 (Not Not B) + 12, CCur(BP / 10000@) + (BL * 429496.7296@)
PutMem4 H(3) - 4, S
Debug.Assert App.hInstance
Else
NullBetween4 = Left$(NullBetween4, S + 1)
End If
Else
NullBetween4 = vbNullString
End If
PutMem4 CP, 0
End If
End If
End Function
Instead of improving on the algorithm itself I paid attention to the fact Left$ always creates a new string. What I changed was to reduce the length of output string without creating a new string. I then created a new itemless byte array where I placed the string overflow bytes so that these get cleared from the memory when the byte array is erased. By testing I found out that starting at around 4 kB of data this method becomes faster than using Left$.
As it may be unclear what the related code exactly does, here is a commented version:
vb Code:
' make S = the final count of characters
S = S + 1
' make the character after the end of the string a NULL (as required of a valid BSTR)
C(S) = 0
' count of characters to length in bytes
S = S * 2
' H(3) = StrPtr(NullBetween4) -> calculate the pointer to data we free from memory
BP = H(3) + S + 2
' calculate the length of bytes to free from memory
BL = L - S
' create initialized but dataless byte array
B = vbNullString
' get the pointer to safe array header and mark new data pointer and length in one API call
PutMem8 (Not Not B) + 12, CCur(BP / 10000@) + (BL * 429496.7296@)
' change the length of NullBetween4 by changing the BSTR length information