A long while ago when I posted the code to extract text from a PDF using iTextSharp, a VBF member asked me to write a function to extract images too. I was busy at the time and didn't dig too deep into it. And recently, while trying to find a way to extracted hyperlinks from a PDF (asked by a VBF member), I also figured out how to get the images. So I thought I would post the code here to share with everyone.

Note1: You'll need to add a reference of iTextSharp.dll to your project. It can be downloaded by Googling for "itextsharp download" if you don't already have it.

Note2: This code were written targetting .Net 2.0 framework. It will still work on .Net 1.x if you replace every occurances of "List(Of Image)" in the code with an ArrayList.

vb.net Code:
  1. Public Shared Function ExtractImages(ByVal sourcePdf As String) As List(Of Image)
  2.         Dim imgList As New List(Of Image)
  3.  
  4.         Dim raf As iTextSharp.text.pdf.RandomAccessFileOrArray = Nothing
  5.         Dim reader As iTextSharp.text.pdf.PdfReader = Nothing
  6.         Dim pdfObj As iTextSharp.text.pdf.PdfObject = Nothing
  7.         Dim pdfStrem As iTextSharp.text.pdf.PdfStream = Nothing
  8.        
  9.         Try
  10.             raf = New iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf)
  11.             reader = New iTextSharp.text.pdf.PdfReader(raf, Nothing)
  12.  
  13.             For i As Integer = 0 To reader.XrefSize - 1
  14.                 pdfObj = reader.GetPdfObject(i)
  15.                 If Not IsNothing(pdfObj) AndAlso pdfObj.IsStream() Then
  16.                     pdfStrem = DirectCast(pdfObj, iTextSharp.text.pdf.PdfStream)
  17.                     Dim subtype As iTextSharp.text.pdf.PdfObject = pdfStrem.Get(iTextSharp.text.pdf.PdfName.SUBTYPE)
  18.                     If Not IsNothing(subtype) AndAlso subtype.ToString = iTextSharp.text.pdf.PdfName.IMAGE.ToString Then
  19.                         Dim bytes() As Byte = iTextSharp.text.pdf.PdfReader.GetStreamBytesRaw(CType(pdfStrem, iTextSharp.text.pdf.PRStream))
  20.                         If Not IsNothing(bytes) Then
  21.                             Try
  22.                                 Using memStream As New System.IO.MemoryStream(bytes)
  23.                                     memStream.Position = 0
  24.                                     Dim img As Image = Image.FromStream(memStream)
  25.                                     imgList.Add(img)
  26.                                 End Using
  27.                             Catch ex As Exception
  28.                                 'Most likely the image is in an unsupported format
  29.                                 'Do nothing
  30.                                 'You can add your own code to handle this exception if you want to
  31.                             End Try
  32.                         End If
  33.                     End If
  34.                 End If
  35.             Next
  36.             reader.Close()
  37.         Catch ex As Exception
  38.             MessageBox.Show(ex.Message)
  39.         End Try
  40.         Return imgList
  41.     End Function