Hi

I've been trying to translate some example itext code into vb.net for a few days now.

Link is here

The code is to grab text from a specified area of a pdf and only if it's a specified font. I'm trying to use this to separate out some text that has some other text beneath it in a different font.

The code on the page is available in c# and java, but I only know vb.net, so I've had to copy and paste into online translators like carloslag and that's taken me to the point where I can extract by font, but the specified area is being ignored and I'm getting all the text in the pdf with the specified font.

I could post what I've done so far, but I thought it might be better for someone to look at this from scratch. If it would help to post what I've done I'll happily do so.

Please help - I'm really struggling with this as I don't have sufficient knowledge of either c# OR itext!

The c# code is below.

Code:
using System;
using System.IO;
using iText.Kernel.Font;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Data;
using iText.Kernel.Pdf.Canvas.Parser.Filter;
using iText.Kernel.Pdf.Canvas.Parser.Listener;

namespace iText.Samples.Sandbox.Parse
{
    public class ParseCustom
    {
        public static readonly String DEST = "results/txt/parse_custom.txt";

        public static readonly String SRC = "../../../resources/pdfs/nameddestinations.pdf";

        public static void Main(String[] args)
        {
            FileInfo file = new FileInfo(DEST);
            file.Directory.Create();

            new ParseCustom().ManipulatePdf(DEST);
        }

        public virtual void ManipulatePdf(String dest)
        {
            PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));

            Rectangle rect = new Rectangle(36, 750, 523, 56);
            CustomFontFilter fontFilter = new CustomFontFilter(rect);
            FilteredEventListener listener = new FilteredEventListener();

            // Create a text extraction renderer
            LocationTextExtractionStrategy extractionStrategy = listener
                .AttachEventListener(new LocationTextExtractionStrategy(), fontFilter);

            // Note: If you want to re-use the PdfCanvasProcessor, you must call PdfCanvasProcessor.reset()
            new PdfCanvasProcessor(listener).ProcessPageContent(pdfDoc.GetFirstPage());

            // Get the resultant text after applying the custom filter
            String actualText = extractionStrategy.GetResultantText();

            pdfDoc.Close();

            // See the resultant text in the console
            Console.Out.WriteLine(actualText);

            using (StreamWriter writer = new StreamWriter(dest))
            {
                writer.Write(actualText);
            }
        }

        // The custom filter filters only the text of which the font name ends with Bold or Oblique.
        protected class CustomFontFilter : TextRegionEventFilter
        {
            public CustomFontFilter(Rectangle filterRect)
                : base(filterRect)
            {
            }

            public override bool Accept(IEventData data, EventType type)
            {
                if (type.Equals(EventType.RENDER_TEXT))
                {
                    TextRenderInfo renderInfo = (TextRenderInfo) data;
                    PdfFont font = renderInfo.GetFont();
                    if (null != font)
                    {
                        String fontName = font.GetFontProgram().GetFontNames().GetFontName();
                        return fontName.EndsWith("Bold") || fontName.EndsWith("Oblique");
                    }
                }

                return false;
            }
        }
    }
}
Thanks