Results 1 to 3 of 3

Thread: [RESOLVED] Webpage Login+scraping with httpWebRequest

  1. #1

    Thread Starter
    Junior Member
    Join Date
    May 2007
    Posts
    31

    Resolved [RESOLVED] Webpage Login+scraping with httpWebRequest

    Dear sir, I want to download a webpage by httpWebRequest.

    Web page Address >> http://www.stockbangladesh.com/resou...l=18455&inv=60

    from a free website

    http://www.stockbangladesh.com/

    But that page requires login first.
    my user name : babul37 and pass : mhbb7337

    How can I download that page by httpWebRequest?

    I have found several examples on this matter, but no example is complete. After a week of trying I fail.

    Pls help me.

  2. #2
    Fanatic Member
    Join Date
    Nov 2000
    Location
    Minnesota
    Posts
    830

    Re: Webpage Login+scraping with httpWebRequest

    Add the following class

    Code:
    Imports Microsoft.VisualBasic
    Imports System.IO
    Imports System.Net
    Imports System.Text
    Imports System.Text.RegularExpressions
    Imports System
    
    
    Public Class WebPost
        Public Sub New()
            MyBase.New()
    
            'Add any initialization after the InitializeComponent() call
            m_sURL = String.Empty
            m_sReferer = String.Empty
            m_sUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322)"
    
            m_hwrRequest = Nothing
            m_hwrResponse = Nothing
            m_ccCookies = New CookieCollection()
        End Sub
    #Region " Instance variables "
        Private m_sUserAgent As String
        Private m_hwrRequest As HttpWebRequest
        Private m_hwrResponse As HttpWebResponse
        Private m_ccCookies As CookieCollection
        Private m_sReferer As String
        Private m_sURL As String
    #End Region
    
        Public ReadOnly Property Cookies() As CookieCollection
            Get
                Return m_ccCookies
            End Get
        End Property
    
        Public Property URL() As String
            Get
                Return m_sURL
            End Get
            Set(ByVal Value As String)
                m_sURL = Value
            End Set
        End Property
    
        Public Property Referer() As String
            Get
                Return m_sReferer
            End Get
            Set(ByVal Value As String)
                m_sReferer = Value
            End Set
        End Property
    
        Public Function Request(ByVal sPOSTData As String, Optional ByVal bAutoRedirect As Boolean = False) As String
            Dim uriSite As Uri
            Dim sReturn As String
            Dim srReader As StreamReader
    
            sReturn = String.Empty
            Try
                ' Setup request
                uriSite = New Uri(m_sURL)
                m_hwrRequest = DirectCast(WebRequest.Create(uriSite), HttpWebRequest)
                m_hwrRequest.Referer = m_sReferer
                m_hwrRequest.UserAgent = m_sUserAgent
                m_hwrRequest.AllowAutoRedirect = bAutoRedirect
                m_hwrRequest.AllowWriteStreamBuffering = True
                m_hwrRequest.KeepAlive = False
                m_hwrRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, application/x-shockwave-flash, */*"
    
                ''Set cookies if any
                'If m_ccCookies.Count > 0 Then
                '    m_hwrRequest.CookieContainer = New CookieContainer()
                '    m_hwrRequest.CookieContainer.Add(m_ccCookies)
                'End If
    
                'If POST data then set here
                If Not (sPOSTData Is Nothing) AndAlso sPOSTData.Length > 0 Then
                    Dim stWS As Stream
                    Dim aeEnc As ASCIIEncoding
                    Dim baBuf As Byte()
    
                    aeEnc = New ASCIIEncoding()
                    baBuf = aeEnc.GetBytes(sPOSTData)
    
                    m_hwrRequest.Method = "POST"
                    m_hwrRequest.ContentLength = baBuf.Length
                    m_hwrRequest.ContentType = "application/x-www-form-urlencoded"
    
                    stWS = m_hwrRequest.GetRequestStream()
                    stWS.Write(baBuf, 0, baBuf.Length)
                    stWS.Close()
                    'm_hwrRequest.AllowAutoRedirect = True
                End If
    
    
                m_hwrResponse = DirectCast(m_hwrRequest.GetResponse(), HttpWebResponse)
    
                srReader = New StreamReader(m_hwrResponse.GetResponseStream())
                sReturn = srReader.ReadToEnd()
                srReader.Close()
    
    
                'If Not m_hwrResponse.Headers("Set-Cookie") Is Nothing Then
                '    'txtOutput.Text = m_hwrResponse.Headers("Set-Cookie")
                '    Dim ccContainer As New CookieContainer()
                '    Dim sNewCookie As String
    
                '    ccContainer = New CookieContainer()
    
                '    'Checking to see if in the Response Header to see if Set-Cookie value has the term 'Domain'
                '    ' in it and if does then l loop through each cookie and remove it since we are
                '    ' adding it with the URI value.
                '    If m_hwrResponse.Headers("Set-Cookie").IndexOf("Domain") Then
                '        arrCookies = m_hwrResponse.Headers("Set-Cookie").Split(";")
    
                '        For Each sTemp2 In arrCookies
                '            If sTemp2.StartsWith("Domain") = False Then
                '                sNewCookie &= sTemp2 & ";"
                '            End If
                '        Next
                '        ccContainer.SetCookies(m_hwrResponse.ResponseUri, sNewCookie)
                '    Else
                '        ccContainer.SetCookies(m_hwrResponse.ResponseUri, m_hwrResponse.Headers("Set-Cookie"))
                '    End If
    
                '    'ccContainer.SetCookies(m_hwrResponse.ResponseUri, m_hwrResponse.Headers("Set-Cookie"))
                '    m_ccCookies.Add(ccContainer.GetCookies(m_hwrResponse.ResponseUri))
                'End If
    
                Me.Referer = m_hwrResponse.ResponseUri.AbsoluteUri
                'Catch wex As Exception
                '    sReturn &= wex.Message.ToString & vbCrLf
    
            Catch ex As Exception
                sReturn &= ex.ToString & vbCrLf
    
            End Try
    
            Return sReturn
        End Function
    
        Public Function RequestBinaryData(ByVal Data As String, Optional ByVal bAutoRedirect As Boolean = False) As Byte()
            Dim uriSite As Uri
            Dim sReturn As String
            'Dim srReader As StreamReader
            'Dim brReader As BinaryReader
            Dim bytesRead As Byte() = Nothing
    
            sReturn = String.Empty
            Try
                ' Setup request
                uriSite = New Uri(m_sURL)
                m_hwrRequest = DirectCast(WebRequest.Create(uriSite), HttpWebRequest)
                m_hwrRequest.Referer = m_sReferer
                m_hwrRequest.UserAgent = m_sUserAgent
                m_hwrRequest.AllowAutoRedirect = bAutoRedirect
                m_hwrRequest.AllowWriteStreamBuffering = True
                m_hwrRequest.KeepAlive = False
                m_hwrRequest.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, application/x-shockwave-flash, */*"
    
          
    
                'Dim cokMM As Cookie
    
                If m_ccCookies.Count > 0 Then
                    'For Each cokMM In m_ccCookies
                    '    LogResponse("COOKIE(name-value): " & cokMM.Name & "-" & cokMM.Value)
                    'Next
                    m_hwrRequest.CookieContainer = New CookieContainer()
                    m_hwrRequest.CookieContainer.Add(m_ccCookies)
                End If
    
                m_hwrResponse = DirectCast(m_hwrRequest.GetResponse(), HttpWebResponse)
    
                bytesRead = ReadAsByteArray(m_hwrResponse.GetResponseStream())
    
                If Not m_hwrResponse.Headers("Set-Cookie") Is Nothing Then
                    Dim ccContainer As New CookieContainer()
    
                    ccContainer = New CookieContainer()
                    ccContainer.SetCookies(m_hwrResponse.ResponseUri, m_hwrResponse.Headers("Set-Cookie"))
                    m_ccCookies.Add(ccContainer.GetCookies(m_hwrResponse.ResponseUri))
                End If
    
                Me.Referer = m_hwrResponse.ResponseUri.AbsoluteUri
            Catch wex As Exception
                Dim sError As String
                sError = wex.Message.ToString
        
            End Try
    
            Return bytesRead
        End Function
    
    #Region " IO Supporting "
    
        Public Function SaveImageToByte(ByVal sImagePath As String) As Byte()
    
            Dim iImageDataFound As Int32 = 0
    
            m_sURL = sImagePath
            m_sReferer = ""
            Dim baImage() As Byte = Nothing
            baImage = RequestBinaryData(String.Empty)
    
            Return baImage
        End Function
    
        Public Function SaveUrlToFile(ByVal sUrl As String, ByVal sFullPathPlusFilename As String) As Boolean
            Dim bRetVal As Boolean = False
            Dim iImageDataFound As Int32 = 0
    
    
            m_sURL = sUrl
            m_sReferer = ""
            Dim baImage() As Byte = Nothing
            baImage = RequestBinaryData(String.Empty)
    
            If Not (baImage Is Nothing) Then
                iImageDataFound = 1
            End If
    
            If baImage.Length > 0 Then
                iImageDataFound = 1
            End If
    
            If iImageDataFound = 1 Then
                SaveDataToFile(baImage, sFullPathPlusFilename)
                bRetVal = True
            End If
    
            Return bRetVal
    
        End Function
    
        Private Const MAX_BUF As Integer = 1024
    
        Private Function ReadAsByteArray(ByVal stStream As Stream) As Byte()
            Dim msBuffer As New MemoryStream()
            Dim swWriter As New StreamWriter(msBuffer)
            Dim nTotalBytesRead As Integer
            Dim nBytesRead As Integer
            Dim baBytes(MAX_BUF) As Byte
    
            nTotalBytesRead = 0
            nBytesRead = MAX_BUF
            While True
                nBytesRead = stStream.Read(baBytes, 0, MAX_BUF)
    
                If nBytesRead = 0 Then
                    Exit While
                End If
    
                nTotalBytesRead += nBytesRead
                msBuffer.Write(baBytes, 0, nBytesRead)
            End While
    
            msBuffer.Close()
    
            Dim baReturn(nTotalBytesRead - 1) As Byte
            Array.Copy(msBuffer.GetBuffer(), baReturn, nTotalBytesRead)
    
            Return baReturn
        End Function
    
        Protected Function SaveDataToFile( _
                 ByVal baData As Byte(), _
                 ByVal FilePath As String) As Boolean
            Dim bReturn As Boolean
    
            bReturn = False
    
            Try
                Dim os As FileStream
    
                os = New FileStream(FilePath, FileMode.Create)
                os.Write(baData, 0, baData.Length)
                os.Close()
                'os.Flush()
                os = Nothing
    
                bReturn = True
            Catch ex As Exception
                ' Nothing for now
            End Try
    
            Return bReturn
        End Function
    
    
    #End Region
    End Class
    Then from the page you want to post to do something like so.
    Code:
    Dim clsWebPost As New WebPost
    Dim sPOST As String = ""
    sPOSTData = "?Username=xxxx&Password=yyyyy"
    sPOSTData = System.Web.HttpUtility.UrlEncode(sPOSTData)
    clsWebPost.URL = "http://www.pagetopostto.com"
    sReturnCode = clsWebPost.Request(sPOSTData, True)
    Please note that the post url may not be the same as the url that has the form so you may have to do a view source or use a tool like fiddler that can see where it posts to and also the proper format the post data is passed and then you adjust.

  3. #3

    Thread Starter
    Junior Member
    Join Date
    May 2007
    Posts
    31

    Resolved [RESOLVED]: Webpage Login+scraping with httpWebRequest

    lleemon - Thanksssss a loooooooot...

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  



Click Here to Expand Forum to Full Width