|
-
Mar 16th, 2021, 09:28 AM
#1
Thread Starter
Addicted Member
Help needed translating c# code
Hi
I've been trying to translate some example itext code into vb.net for a few days now.
Link is here
The code is to grab text from a specified area of a pdf and only if it's a specified font. I'm trying to use this to separate out some text that has some other text beneath it in a different font.
The code on the page is available in c# and java, but I only know vb.net, so I've had to copy and paste into online translators like carloslag and that's taken me to the point where I can extract by font, but the specified area is being ignored and I'm getting all the text in the pdf with the specified font.
I could post what I've done so far, but I thought it might be better for someone to look at this from scratch. If it would help to post what I've done I'll happily do so.
Please help - I'm really struggling with this as I don't have sufficient knowledge of either c# OR itext!
The c# code is below.
Code:
using System;
using System.IO;
using iText.Kernel.Font;
using iText.Kernel.Geom;
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Data;
using iText.Kernel.Pdf.Canvas.Parser.Filter;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
namespace iText.Samples.Sandbox.Parse
{
public class ParseCustom
{
public static readonly String DEST = "results/txt/parse_custom.txt";
public static readonly String SRC = "../../../resources/pdfs/nameddestinations.pdf";
public static void Main(String[] args)
{
FileInfo file = new FileInfo(DEST);
file.Directory.Create();
new ParseCustom().ManipulatePdf(DEST);
}
public virtual void ManipulatePdf(String dest)
{
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));
Rectangle rect = new Rectangle(36, 750, 523, 56);
CustomFontFilter fontFilter = new CustomFontFilter(rect);
FilteredEventListener listener = new FilteredEventListener();
// Create a text extraction renderer
LocationTextExtractionStrategy extractionStrategy = listener
.AttachEventListener(new LocationTextExtractionStrategy(), fontFilter);
// Note: If you want to re-use the PdfCanvasProcessor, you must call PdfCanvasProcessor.reset()
new PdfCanvasProcessor(listener).ProcessPageContent(pdfDoc.GetFirstPage());
// Get the resultant text after applying the custom filter
String actualText = extractionStrategy.GetResultantText();
pdfDoc.Close();
// See the resultant text in the console
Console.Out.WriteLine(actualText);
using (StreamWriter writer = new StreamWriter(dest))
{
writer.Write(actualText);
}
}
// The custom filter filters only the text of which the font name ends with Bold or Oblique.
protected class CustomFontFilter : TextRegionEventFilter
{
public CustomFontFilter(Rectangle filterRect)
: base(filterRect)
{
}
public override bool Accept(IEventData data, EventType type)
{
if (type.Equals(EventType.RENDER_TEXT))
{
TextRenderInfo renderInfo = (TextRenderInfo) data;
PdfFont font = renderInfo.GetFont();
if (null != font)
{
String fontName = font.GetFontProgram().GetFontNames().GetFontName();
return fontName.EndsWith("Bold") || fontName.EndsWith("Oblique");
}
}
return false;
}
}
}
}
Thanks
Tags for this Thread
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|