I have got it working using iTextSharp. My code is below in case anybody else finds it useful. It will extract the text within the left half of a PDF page. You would just change the rectangle dimensions to suit your own region:
VB.NET Code:
'iTextSharp Imports
Imports iTextSharp.text
Imports iTextSharp.text.pdf
Imports iTextSharp.text.pdf.parser
Public Function GetPDFTextFromRectangle(ByVal PDFPath As String, ByVal PageNo As Integer) As String
Dim Reader As PdfReader = Nothing
Dim PDFOutput As String = Nothing
Try
Reader = New PdfReader(PDFPath)
'Get the Page Width/Height
Dim PageHeight As Single = Reader.GetPageSize(PageNo).Height
Dim PageWidth As Single = Reader.GetPageSize(PageNo).Width
'Rectangle representing the area that contains the text. Parameters:
' Bottom-Left-X
' Bottom-Left-Y
' Top-Right-X
' Top-Right-Y
Dim PageRect As New iTextSharp.text.Rectangle(0, PageHeight, PageWidth / 2, 0)
'Required Filter and Strategy to extract text
Dim Filter As RenderFilter = New RegionTextRenderFilter(PageRect)
Dim Strategy As ITextExtractionStrategy = New FilteredTextRenderListener( _
New LocationTextExtractionStrategy, _
Filter)
'Extract the text from the rectangle region of the given page number
PDFOutput = PdfTextExtractor.GetTextFromPage(Reader, PageNo, Strategy)
Catch Ex As Exception
MessageBox.Show(Ex.Message.ToString)
Finally
Reader.Close()
Return PDFOutput
End Try
End Function