Problem in reading in case of line breaks
Hi,
I am reading from a CSV file. My code is something like this:
Code:
Set f = fs.OpenTextFile(sDesktopPath & "\" & "Download.txt")
f.ReadLine
Do While Not f.AtEndOfStream
sLine = Replace(f.ReadLine, Chr(39), Chr(39) & Chr(39)) 'Replace all ' symbol with '' symbol
sLine = Replace(sLine, Chr(34), Chr(39)) 'Replace " with '
sLine = Replace(sLine, "&", "&") 'Remove "amp;"
sBuffer = Split(sLine, Chr(39) & vbTab & Chr(39))
For i = 1 To UBound(sBuffer) - 1
sBuffer(i) = "'" & Trim(sBuffer(i)) & "'"
Next
sBuffer(0) = sBuffer(0) & Chr(39)
sBuffer(UBound(sBuffer)) = Chr(39) & sBuffer(UBound(sBuffer))
'For integers - remove '
sBuffer(8) = Replace(sBuffer(8), Chr(39), "")
sBuffer(9) = Replace(sBuffer(9), Chr(39), "")
sBuffer(10) = Replace(sBuffer(10), Chr(39), "")
sBuffer(17) = Replace(sBuffer(17), Chr(39), "")
sBuffer(20) = Replace(sBuffer(20), Chr(39), "")
'''''''''''some other code constructs
Loop
It is working fine. But in some cases my 11th columns has line breaks and its not able to read next columns(from 12th column onwards) and as a result statement
sBuffer(17) = Replace(sBuffer(17), Chr(39), "")
is resulting in error.
Any suggestions to solve in case when the record has line breaks in a column.If no line breaks its working perfectly.
Regards,
Ashish
Re: Problem in reading in case of line breaks
as you are reading one line at a time, how are you getting linebreaks in your string, looks like it must be some other character, you need to find exactly what unwanted character(s) are in your string
Re: Problem in reading in case of line breaks
I don't think he's getting line breaks mid string, I think he's getting half a record because of a line break.
As this looks like a specific CSV, won't the ubound of sBuffer give you a good idea of whether column 11 is a multiline field or not.
Re: Problem in reading in case of line breaks
but even so why would
Quote:
sBuffer(17) = Replace(sBuffer(17), Chr(39), "")
is resulting in error.
give an error in any valid string
Re: Problem in reading in case of line breaks
I don't follow (lack of caffeine perhaps ) he'll be getting 'subscript out of range' surely???
If UBound(sBuffer)<20 then read the next line and combine
Re: Problem in reading in case of line breaks
First of all, here we have a split function that can ignore line changes that are within a pair of quotes:
Code:
Public Sub QuickSplit(Expression As String, ResultSplit() As String, Optional Delimiter As String = " ", Optional ByVal Limit As Long = -1, Optional ByVal Compare As VbCompareMethod = vbBinaryCompare, Optional ByRef IgnoreDelimiterWithin As String = vbNullString)
Dim lngA As Long, lngB As Long, lngCount As Long, lngDelLen As Long, lngExpLen As Long, lngExpPtr As Long, lngIgnLen As Long, lngResults() As Long
lngExpLen = LenB(Expression)
lngDelLen = LenB(Delimiter)
If lngExpLen > 0 And lngDelLen > 0 And (Limit > 0 Or Limit = -1&) Then
lngIgnLen = LenB(IgnoreDelimiterWithin)
If lngIgnLen Then
lngA = InStrB(1, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
lngB = InStrB(1, Expression, IgnoreDelimiterWithin, Compare)
Do Until (lngB And 1) Or (lngB = 0)
lngB = InStrB(lngB + 1, Expression, IgnoreDelimiterWithin, Compare)
Loop
If Limit = -1& Then
ReDim lngResults(0 To (lngExpLen \ lngDelLen))
Do While lngA > 0
If lngA + lngDelLen < lngB Or lngB = 0 Then
lngResults(lngCount) = lngA
lngA = InStrB(lngA + lngDelLen, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
lngCount = lngCount + 1
Else
lngB = InStrB(lngB + lngIgnLen, Expression, IgnoreDelimiterWithin, Compare)
Do Until (lngB And 1) Or (lngB = 0)
lngB = InStrB(lngB + 1, Expression, IgnoreDelimiterWithin, Compare)
Loop
If lngB Then
lngA = InStrB(lngB + lngIgnLen, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
If lngA Then
lngB = InStrB(lngB + lngIgnLen, Expression, IgnoreDelimiterWithin, Compare)
Do Until (lngB And 1) Or (lngB = 0)
lngB = InStrB(lngB + 1, Expression, IgnoreDelimiterWithin, Compare)
Loop
End If
End If
End If
Loop
Else
ReDim lngResults(0 To Limit - 1)
Do While lngA > 0
If lngA + lngDelLen < lngB Or lngB = 0 Then
lngResults(lngCount) = lngA
lngA = InStrB(lngA + lngDelLen, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
lngCount = lngCount + 1
If lngCount = Limit Then Exit Do
Else
lngB = InStrB(lngB + lngIgnLen, Expression, IgnoreDelimiterWithin, Compare)
Do Until (lngB And 1) Or (lngB = 0)
lngB = InStrB(lngB + 1, Expression, IgnoreDelimiterWithin, Compare)
Loop
If lngB Then
lngA = InStrB(lngB + lngIgnLen, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
If lngA Then
lngB = InStrB(lngB + lngIgnLen, Expression, IgnoreDelimiterWithin, Compare)
Do Until (lngB And 1) Or (lngB = 0)
lngB = InStrB(lngB + 1, Expression, IgnoreDelimiterWithin, Compare)
Loop
End If
End If
End If
Loop
End If
Else
lngA = InStrB(1, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
If Limit = -1& Then
ReDim lngResults(0 To (lngExpLen \ lngDelLen))
Do While lngA > 0
lngResults(lngCount) = lngA
lngA = InStrB(lngA + lngDelLen, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
lngCount = lngCount + 1
Loop
Else
ReDim lngResults(0 To Limit - 1)
Do While lngA > 0 And lngCount < Limit
lngResults(lngCount) = lngA
lngA = InStrB(lngA + lngDelLen, Expression, Delimiter, Compare)
Do Until (lngA And 1) Or (lngA = 0)
lngA = InStrB(lngA + 1, Expression, Delimiter, Compare)
Loop
lngCount = lngCount + 1
Loop
End If
End If
ReDim Preserve ResultSplit(0 To lngCount)
If lngCount = 0 Then
ResultSplit(0) = Expression
Else
lngExpPtr = StrPtr(Expression)
ResultSplit(0) = LeftB$(Expression, lngResults(0) - 1)
For lngCount = 0 To lngCount - 2
ResultSplit(lngCount + 1) = MidB$(Expression, lngResults(lngCount) + lngDelLen, lngResults(lngCount + 1) - lngResults(lngCount) - lngDelLen)
Next lngCount
ResultSplit(lngCount + 1) = RightB$(Expression, lngExpLen - lngResults(lngCount) - lngDelLen + 1)
End If
Else
ReDim ResultSplit(0 To 0)
ResultSplit(0) = Expression
End If
End Sub
I know, the code is long, but it is also very fast :)
Now as for reading the CSV with this code:
Code:
Const SQUOTE = "'"
Dim FF As Integer, lngA As Long, sBuffer() As String, sLines() As String
FF = FreeFile
' read the whole file at once
Open sDesktopPath & "\" & "Download.txt" For Binary Access Read As #FF
' split the file to rows, ignore line changes that are within a cell
QuickSplit Input(LOF(FF), #FF), sLines, vbNewLine, , , SQUOTE
Close #FF
' now we can read each individual line
For lngA = 0 To UBound(sLines)
' this time around we split by tab, but we ignore tabs that are within a cell data
QuickSplit sLines(lngA), sBuffer, vbTab, , , SQUOTE
' now do whatever you want to do with sBuffer data
Next lngA
Hopefully no mistake/typo there :D
Edit
And there we go fixing the thought mistake.
Re: Problem in reading in case of line breaks
Two questions for you Merri...
Firstly is it a Sub or a Function? :rolleyes:
Secondly without testing it, it looks like it can deal with both 1 and 2 byte chars is that right? It would be nice to have a SplitB function, looks like this might be it.
Re: Problem in reading in case of line breaks
Heh, thanks about that, fixed.
You have to uncomment all the Do Until (lngX And 1) Or (lngX = 0) loops and yes, after that it should handle byte positions too. As for speed, it is about three times faster than native Split by avarage (it is faster than any of the Split solutions over at VBspeed).
Also: the sub doesn't work with TextCompare, but I'm not going to bother with that :)