-
regex pattern help
I am trying to pull data from html on a page but it isn't finding the values.
Can anyone see what the issue might be?
Sample source page:
http://www.lleemon.com/regex-test.html
Sample vb function:
Code:
Private Function fnGetSourceValues(ByVal sHTML As String) As String
Dim options As RegexOptions = RegexOptions.IgnoreCase Or RegexOptions.Multiline
Dim sValue As String = ""
Dim sRE As String = ""
'sample pattern
sRE = "<table width=""635"" border=""0"" cellspacing=""0"" cellpadding=""0""> <tr> <td background=""Images/backgroup_banner.gif""><img src=""images/banner_productitemdetail.gif"" width=""617"" height=""62""></td> </tr> <tr> <td bgcolor=""DDE6CD""> <table width=""635"" border=""0"" cellspacing=""4"" cellpadding=""0""> <tr> <td bgcolor=""DDE6CD""> <table width=""633"" border=""0"" cellspacing=""0"" cellpadding=""0""> <tr> <td height=""24""> <table width=""100%"" border=""0"" class=""font6"" cellpadding=""0"" cellspacing=""0"" > <form name=""CFForm_1"" action=""shp_c/index.cfm"" method=POST onSubmit=""return _CF_checkCFForm_1(this)"" enctype=""multipart/form-data""> <input type=""hidden"" name=""FROMITEMDETAIL"" value=""Y""> <tr> <td width=""0"" valign=""top"">ItemNo:</td> <td width=""260"" valign=""top""><font color=""9C3A3A"">(.*)</font> </td> <td width=""54"" valign=""top"">Desc:</td> <td width=""303"" valign=""top""><font color=""9C3A3A"">(.*)</font></td> <input type=""hidden"" name=""ItemNumber"" value=""#WP1030-ASST""> <input type=""hidden"" name=""BasePrice"" value=""0""> <input type=""hidden"" name=""Banner"" value=""2""> <td width=""0"" valign=""top"" align=""right""> <input type=""text"" name=""Qty"" size=""4""> </td> <td width=""0"" valign=""top"" align=""right""> EA </td> </tr> <tr> <td width=""0"" valign=""top""> </td> <td colspan=""3"" valign=""top"" align=""right""> </td> <td colspan=""2"" width=""0"" valign=""top""> <input type=""image"" src=""images/addtocart.gif"" border=""0"" img alt=Submit value=""add to cart"" name=""Buy""> </td> </tr> </form> </table> </td> </tr> <tr> <td> <table width=""633"" border=""0"" cellspacing=""0"" cellpadding=""0""> <tr> <td width=""250"" valign=""top""> <table width=""250"" border=""0"" cellspacing=""0"" cellpadding=""0""> <tr> <td height=""180"" width=""250"" valign=""top""> <table width=""250"" border=""0"" cellspacing=""0"" cellpadding=""0"" align=""left"" height=""250""> <tr valign=""top""> <td height=""180""> <img border=0 src=""(.*)"" width=""240"" height=""180""> </td> </tr> </table> </td> </tr> <tr> <td height=""0"" width=""250"" valign=""top"" class=""font6""> </td> </tr> </table> </td> <td width=""385"" valign=""top""> <table width=""100%"" border=""0"" cellspacing=""0"" cellpadding=""0"" height=""250"" class=""font2"" bordercolor=""DDE6CD""> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2"">Uom:</td> <td width=""150"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">CatalogVol:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2"">MinQty:</td> <td width=""120"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">CatPage:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2"">BoxQty:</td> <td width=""120"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">P-CatVol:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2"">CaseQty:</td> <td width=""120"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">P-CatPage:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2"">BasePrice:</td> <td width=""120"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">PolyBag:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2""> Avail. Qty: </td> <td width=""120"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">CaseCube:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" bgcolor=""F2F1E7"" height=""20"" class=""font2"">ProdLength:</td> <td width=""166"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" height=""21"" class=""font2"">SugRetail:</td> <td width=""78"" height=""21"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" bgcolor=""F2F1E7"" height=""20"" class=""font2"">ProdWeight:</td> <td width=""123"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" class=""font2"">Class:</td> <td width=""78"" class=""font2"">(.*)</td> </tr> <tr> <td width=""0"" height=""20"" bgcolor=""F2F1E7"" class=""font2"">UPC:</td> <td width=""0"" class=""font2"">(.*)</td> <td width=""0"" bgcolor=""F2F1E7"" height=""21"" class=""font2"">Category:</td> <td width=""78"" height=""21"" class=""font2"">(.*)</td> </tr> </table> </td> </tr> </table> </td> </tr> </table> </td> </tr> </table> </td> </tr> </table>"
Dim rx As Regex = New Regex(sRE, options)
Dim mc As MatchCollection = rx.Matches(sHTML)
Dim mMatch As Match = Nothing
If mc.Count > 1 Then
For iLoop = 0 To mc.Count - 1
Dim sUom As String = ""
Dim sMinQty As String = ""
Dim sBoxQty As String = ""
sUom = mc(iLoop).Groups(1).Value
sMinQty = mc(iLoop).Groups(2).Value
sBoxQty = mc(iLoop).Groups(3).Value
sValue = sUom & "|" & sMinQty
Next
End If
Return sValue
End Function
-
Re: regex pattern help
Use this to help build your expression:
http://regexpal.com/
Additionally instead of explicitly entering the spaces try using the whitespace character-thingy \s with a star(or lazy star) repeater. Something that looks like this:
When I took a look at your page source I saw a lot of extra white space between tags, that's why I suggest this. It may not solve your problem.
This will help witht he special characters and syntax: http://www.regular-expressions.info/reference.html