Imports System.Net
Imports System.IO
Imports System.Text.RegularExpressions
Public Module ExtractURITest
Public Sub Main()
Console.WriteLine("Enter a URL, and press Enter (must start with http://).")
Dim Url As String = Console.ReadLine()
Dim BaseUri As Uri
Dim Page As String
Try
BaseUri = New Uri(Url)
' Create the request.
Dim PageRequest As HttpWebRequest = CType(WebRequest.Create(Url), HttpWebRequest)
' Get the response.
' This takes the most significant amount of time, particularly
' if the file is large, because the whole response is retrieved.
Dim PageResponse As WebResponse = PageRequest.GetResponse()
Console.WriteLine("Response received.")
' Read the response stream.
Dim r As New StreamReader(PageResponse.GetResponseStream())
Page = r.ReadToEnd()
r.Close()
Catch Err As Exception
Console.WriteLine(Err.ToString())
Console.ReadLine()
Return
End Try
' Define the regular expression.
Dim HrefPattern As String = "href\s*=\s*(?:""(?<match>[^""]*)""|(?<match>\S+))"
'Dim HrefPattern As String = "href\s*=\s*""(?<match>[url]http://.*?[/url])"""
Dim HrefRegex As New Regex(HrefPattern, _
RegexOptions.IgnoreCase Or RegexOptions.Compiled)
' Find and display all the href matches.
Dim HrefMatch As Match = HrefRegex.Match(Page)
Do While HrefMatch.Success
Dim Link As String = HrefMatch.Groups(1).Value
If Link.Substring(0, 1) = "#" Then
' Ignore this match, it was just a bookmark.
Else
' Attempt to determine if this is a fully-qualified link
' by comparing it against some known schemes.
Dim Absolute As Boolean = False
If Link.Length > 8 Then
Dim Scheme As String
Scheme = Uri.UriSchemeHttp & "://"
If Link.Substring(0, Scheme.Length) = Scheme Then Absolute = True
Scheme = Uri.UriSchemeHttps & "://"
If Link.Substring(0, Scheme.Length) = Scheme Then Absolute = True
Scheme = Uri.UriSchemeFile & "://"
If Link.Substring(0, Scheme.Length) = Scheme Then Absolute = True
End If
' (You could compare it against additional schemes here.)
If Absolute Then
Console.WriteLine(Link)
Else
Console.WriteLine(New Uri(BaseUri, Link).ToString())
End If
End If
HrefMatch = HrefMatch.NextMatch()
Loop
Console.ReadLine()
End Sub
End Module