A very simple MSXML sample (XHTML parser)
This is just a simple core sample of how to open a XHTML file using MSXML. The code is wrapped to class module and the HTML file structure is pushed to a listbox.
I made this code only due to interest of trying out MSXML.DLL; though I probably won't code this thing any further, it seems MSXML is very slow for some reason (and it didn't load the full sample file I used using .load function, only the first line).
Hopefully this will be of some help for a person who wants to parse XML and/or XHTML and doesn't know how to get started with it.
Instructions:- Start a new project
- Add a reference to Microsoft XML using Project > References
- Add a new class module, rename it to clsXHTML and add the code below
- Add a listbox to Form1 and paste the second code below
It should work from there. You also need to have some XHTML file on your harddisk to test.
Code:
' clsXHTML
Option Explicit
Private Const DOC_STRICT = "<!doctype html public ""-//w3c//dtd xhtml 1.0 strict//en"" ""http://www.w3.org/tr/xhtml1/dtd/xhtml1-strict.dtd"">"
Private Const DOC_TRANS = "<!doctype html public ""-//w3c//dtd xhtml 1.0 transitional//en"" ""http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd"">"
Private Const DOC_FRAME = "<!doctype html public ""-//w3c//dtd xhtml 1.0 frameset//en"" ""http://www.w3.org/tr/xhtml1/dtd/xhtml1-frameset.dtd"">"
Private Const DOC_11 = "<!doctype html public ""-//w3c//dtd xhtml 1.1//en"" ""http://www.w3.org/tr/xhtml11/dtd/xhtml11.dtd"">"
Public Enum XHTML_DOCTYPE
XHTMLunvalid
XHTML10Strict
XHTML10Transitional
XHTML10Frameset
XHTML11
End Enum
Dim MainChild() As IXMLDOMNode
Dim MainChildCount As Integer
Dim XHTML As DOMDocument
Public Function Doctype() As XHTML_DOCTYPE
Dim intA As Integer, strDoctype As String
For intA = 0 To MainChildCount - 1
If Left$(LCase$(MainChild(intA).xml), 9) = "<!doctype" Then
strDoctype = MainChild(intA).xml
Exit For
End If
Next intA
If intA = MainChildCount Then Exit Function
Select Case LCase$(strDoctype)
Case DOC_STRICT
Doctype = XHTML10Strict
Case DOC_TRANS
Doctype = XHTML10Transitional
Case DOC_FRAME
Doctype = XHTML10Frameset
Case DOC_11
Doctype = XHTML11
End Select
End Function
Public Sub FileClose()
Erase MainChild
MainChildCount = 0
Set XHTML = Nothing
End Sub
Public Function FileOpen(ByRef Filename As String) As Boolean
Dim bytFile As Byte, strXML As String
If LenB(Dir$(Filename)) = 0 Then Exit Function
If LenB(Dir$) > 0 Then Exit Function
bytFile = FreeFile
Open Filename For Input As #bytFile
strXML = input(FileLen(Filename), #bytFile)
Close #bytFile
Set XHTML = New DOMDocument
FileOpen = XHTML.loadXML(strXML)
If Not FileOpen Then FileClose
GetMainChilds
End Function
Private Sub GetMainChilds()
Dim Child As IXMLDOMNode, intCount As Integer, intA As Integer
If XHTML Is Nothing Then Exit Sub
If XHTML.firstChild Is Nothing Then
Erase MainChild
MainChildCount = 0
Exit Sub
End If
If XHTML.firstChild Is XHTML.lastChild Then
ReDim MainChild(0)
Set MainChild(0) = XHTML.firstChild
MainChildCount = 1
Else
Set Child = XHTML.firstChild.nextSibling
Do Until Child Is Nothing
intCount = intCount + 1
Set Child = Child.nextSibling
Loop
ReDim MainChild(intCount)
Set MainChild(0) = XHTML.firstChild
For intA = 1 To intCount
Set MainChild(intA) = MainChild(intA - 1).nextSibling
Next intA
MainChildCount = intCount + 1
End If
End Sub
Public Sub ListElements(ByRef List As ListBox)
Dim intA As Integer, HTML As IXMLDOMNode
For intA = 0 To MainChildCount - 1
If MainChild(intA).baseName = "html" And Left$(LCase$(MainChild(intA).xml), 9) <> "<!doctype" Then Exit For
Next intA
If intA = MainChildCount Then Exit Sub
Set HTML = MainChild(intA)
OutputChildNodes HTML, List
Set HTML = Nothing
End Sub
Public Sub OutputChildNodes(ByRef ParentNode As IXMLDOMNode, ByRef List As ListBox, Optional ByVal Depth As Integer)
Dim Child As IXMLDOMNode
If LenB(ParentNode.baseName) > 0 Then
If Depth > 0 Then
List.AddItem Str$(Depth) & String$(Depth, vbTab) & ParentNode.baseName
Else
List.AddItem ParentNode.baseName
End If
ElseIf LenB(ParentNode.Text) > 0 Then
If Depth > 0 Then
List.AddItem Str$(Depth) & String$(Depth, vbTab) & ParentNode.Text
Else
List.AddItem ParentNode.Text
End If
Else
If Depth > 0 Then
List.AddItem Str$(Depth) & String$(Depth, vbTab) & "(I have no idea what this is!)"
Else
List.AddItem "(I have no idea what this is!)"
End If
End If
If ParentNode.hasChildNodes Then
Set Child = ParentNode.firstChild
Do Until Child Is Nothing
OutputChildNodes Child, List, Depth + 1
Set Child = Child.nextSibling
Loop
End If
End Sub
Private Sub Class_Terminate()
If Not XHTML Is Nothing Then FileClose
End Sub
VB Code:
' Form1
Option Explicit
Private Sub Form_Load()
Dim XHTML As clsXHTML
Set XHTML = New clsXHTML
XHTML.FileOpen "C:\test.html"
XHTML.ListElements List1
XHTML.FileClose
Set XHTML = Nothing
End Sub
Private Sub Form_Resize()
If Me.WindowState = vbMinimized Then Exit Sub
List1.Move 0, 0, Me.ScaleWidth, Me.ScaleHeight
End Sub