Results 1 to 22 of 22

Thread: VB.Net - Merge Pdf Files and Add Bookmarks to It (Using PDFBox)

Threaded View

  1. #1

    Thread Starter
    PowerPoster stanav's Avatar
    Join Date
    Jul 2006
    Location
    Providence, RI - USA
    Posts
    9,290

    VB.Net - Merge Pdf Files and Add Bookmarks to It (Using PDFBox)

    Hello all,
    I was recently working on a job assignment dealing with pdf files. My company produces hundreds of daily reports in pdf format where each report is for a specific division/sub-company. Some top executives want to look at only a single report that contains all divisions/sub-companies instead of looking at each one seperately, so my job is to merge those reports together into a single pdf file with bookmarks for easy navigation. Originally, I had used Acrobat COM object approach but the management didn't want to spend $ to buy a full version of Adobe Acrobat for every PC that runs my program, so I had to rewrite without relying on Acrobat. I then found the open source PDFBox package which can be downloaded here... Once you had the package downloaded and unzipped to a directory in your local machine, you need to add the following references to your project:
    Code:
    IKVM.GNU.Classpath
    IKVM.Runtime
    PDFBox-0.7.3
    To make the story short, here are the steps I did:
    1. Create a list of pdf files to be merge.
    2. Merge those pdf files into a temp file. The merging order will follow the order of the items in the list.
    3. Create a data table to hold bookmark data. Each datarow contains the bookmark title and the page number it points to.
    4. Open the merged temp file and insert bookmarks to it using info from the bookmark data table, then save it to a new file.
    5. If all successful, delete the temp file

    Code of interests:
    vb Code:
    1. Private Function MergePdfFiles(ByVal pdfFileList As List(Of String), _
    2.                                    ByVal outputFileFullName As String) As Boolean
    3.         Dim result As Boolean = False
    4.         Dim pdfMerger As PDFMergerUtility = Nothing
    5.         Dim fileCount As Integer = pdfFileList.Count
    6.         If fileCount > 1 Then
    7.             Try
    8.                 'Instantiate an instance of Pdf Merger Utility
    9.                 pdfMerger = New PDFMergerUtility()
    10.                 With pdfMerger
    11.                     'Set output destination
    12.                     .setDestinationFileName(outputFileFullName)
    13.                     'Looping thru the file list and add source to the merger
    14.                     For i As Integer = 0 To fileCount - 1 Step 1
    15.                         .addSource(pdfFileList(i))
    16.                     Next i
    17.                     'Merge the documents
    18.                     pdfMerger.mergeDocuments()
    19.                     result = True
    20.                 End With
    21.             Catch ex As Exception
    22.                 WriteToLog("MergePDFFile(" & outputFileFullName & "): " & ex.Message)
    23.                 Return False
    24.             End Try
    25.         End If
    26.         Return result
    27.     End Function
    28.  
    29.     Private Function CreateBookmarkDataTable(ByVal pdfFileList As List(Of String)) As DataTable
    30.         Dim bookmarkData As New DataTable
    31.         Dim row As DataRow = Nothing
    32.         Dim bookmarkTitle As String = String.Empty
    33.         Dim pageNumber As Integer = 0
    34.         Try
    35.             bookmarkData.Columns.Add("BookmarkTitle", GetType(String))
    36.             bookmarkData.Columns.Add("PageNumber", GetType(Integer))
    37.             Dim count As Integer = pdfFileList.Count
    38.             If count > 0 Then
    39.                 For i As Integer = 0 To count - 1 Step 1
    40.                     bookmarkTitle = Path.GetFileNameWithoutExtension(pdfFileList(i))
    41.                     row = bookmarkData.NewRow()
    42.                     row.Item("BookmarkTitle") = bookmarkTitle
    43.                     row.Item("PageNumber") = pageNumber
    44.                     bookmarkData.Rows.Add(row)
    45.                     pageNumber += GetPageCount(pdfFileList(i))
    46.                 Next
    47.             End If
    48.         Catch ex As Exception
    49.             WriteToLog("CreateBookmarkDataTable(): " & ex.Message)
    50.             Return Nothing
    51.         End Try
    52.         Return bookmarkData
    53.     End Function
    54.  
    55.     Private Function GetPageCount(ByVal pdfFile As String) As Integer
    56.         Dim pageCount As Integer
    57.         Dim pdfDoc As PDDocument = Nothing
    58.         Try
    59.             pdfDoc = PDDocument.load(pdfFile)
    60.             pageCount = pdfDoc.getNumberOfPages
    61.         Catch ex As Exception
    62.             WriteToLog("GetPageCount(" & pdfFile & "): " & ex.Message)
    63.             Return 0
    64.         Finally
    65.             If Not pdfDoc Is Nothing Then
    66.                 pdfDoc.close()
    67.             End If
    68.         End Try
    69.         Return pageCount
    70.     End Function
    71.  
    72.     Private Function AddBookMarks(ByVal pdfFile As String, _
    73.                                   ByVal bookmarkTable As DataTable) As Boolean
    74.         Dim result As Boolean = False
    75.         Dim PdfDoc As PDDocument = Nothing
    76.         Dim outFile As String = String.Empty
    77.         Dim rowCount As Integer = bookmarkTable.Rows.Count
    78.         Try
    79.             If rowCount > 0 Then
    80.                 'Set the output file full path
    81.                 outFile = pdfFile.Replace("temp_", "")
    82.                 'Load the input pdf file
    83.                 PdfDoc = PDDocument.load(pdfFile)
    84.                 If Not PdfDoc.isEncrypted() Then
    85.                     'Create new document outline and assign it to the pdf document
    86.                     Dim outline As PDDocumentOutline = New PDDocumentOutline()
    87.                     PdfDoc.getDocumentCatalog().setDocumentOutline(outline)
    88.  
    89.                     'Create new outline item for the document outline
    90.                     Dim pagesOutline As PDOutlineItem = New PDOutlineItem()
    91.                     pagesOutline.setTitle("All Pages")
    92.                     outline.appendChild(pagesOutline)
    93.  
    94.                     'Get the list of pages in the document
    95.                     Dim pages As List = PdfDoc.getDocumentCatalog().getAllPages()
    96.  
    97.                     Dim i, pageNumber As Integer
    98.                     Dim row As DataRow = Nothing
    99.                     Dim bookmarkTitle As String = String.Empty
    100.                     'loop thru the bookmark datatable and add bookmarks to the document accordingly
    101.                     For i = 0 To rowCount - 1 Step 1
    102.                         'Read the row's data
    103.                         row = bookmarkTable.Rows(i)
    104.                         pageNumber = CInt(row.Item("PageNumber"))
    105.                         bookmarkTitle = CStr(row.Item("BookmarkTitle"))
    106.                         'Get the page at pageNumber from pages list
    107.                         Dim page As PDPage = CType(pages.get(pageNumber), PDPage)
    108.                         Dim dest As PDPageFitWidthDestination = New PDPageFitWidthDestination()
    109.                         dest.setPage(page)
    110.                         'Then set bookmark to it
    111.                         Dim bookmark As PDOutlineItem = New PDOutlineItem()
    112.                         bookmark.setDestination(dest)
    113.                         bookmark.setTitle(bookmarkTitle)
    114.                         'Add this bookmark to the document's outline
    115.                         pagesOutline.appendChild(bookmark)
    116.                     Next i
    117.                     'Expand the bookmark tree
    118.                     pagesOutline.openNode()
    119.                     outline.openNode()
    120.                     'Save the the document to a file
    121.                     PdfDoc.save(outFile)
    122.                     result = True
    123.                 Else
    124.                     WriteToLog("Can't add bookmarks to <" & pdfFile & "> because the document is encrypted.")
    125.                 End If
    126.             Else
    127.                 WriteToLog("Can't add bookmarks to <" & pdfFile & "> because BookmarkTable has no data.")
    128.             End If
    129.         Catch ex As Exception
    130.             WriteToLog("AddBookmarks(" & pdfFile & "): " & ex.Message)
    131.             Return False
    132.         Finally
    133.             If Not PdfDoc Is Nothing Then
    134.                 PdfDoc.close()
    135.             End If
    136.         End Try
    137.         Return result
    138.     End Function

    The full source code is attached (it's a console application)
    Attached Files Attached Files
    Last edited by stanav; Jun 26th, 2007 at 08:33 AM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  



Click Here to Expand Forum to Full Width