TextMap.cs
 // // This code is part of Document Solutions for PDF demos. // Copyright (c) MESCIUS inc. All rights reserved. // using System; using System.IO; using System.Drawing; using System.Numerics; using System.Collections.Generic; using System.Linq; using GrapeCity.Documents.Text; using GrapeCity.Documents.Drawing; using GrapeCity.Documents.Pdf; using GrapeCity.Documents.Pdf.Annotations; using GrapeCity.Documents.Pdf.Graphics; using GrapeCity.Documents.Pdf.TextMap; namespace DsPdfWeb.Demos { // This sample shows how to use the text map for a page in a PDF // to find geometric positions of text lines on the page, // and to locate the text at a specific position. // The PDF used in this sample was created by TimeSheet. public class TextMap { public int CreatePDF(Stream stream) { var doc = new GcPdfDocument(); var page = doc.NewPage(); var rc = Common.Util.AddNote( "This sample loads the PDF created by the TimeSheet sample into a temporary GcPdfDocument, " + "gets the text map for the first page, and prints out the coordinates and texts of all " + "line fragments in the map. " + "It also uses the map's HitTest method to find the text at specific coordinates in the PDF " + "and prints the result. " + "The original TimeSheet.pdf used by this sample (consisting of 1 page) is appended for reference.", page); // Setup text formatting and layout: var tf = new TextFormat() { Font = StandardFonts.Times, FontSize = 13 }; var tfFound = new TextFormat() { Font = StandardFonts.TimesBold, FontSize = 14, ForeColor = Color.DarkBlue }; var tl = new TextLayout(72) { MaxWidth = doc.PageSize.Width, MaxHeight = doc.PageSize.Height, MarginAll = rc.Left, MarginTop = rc.Bottom + 36, TabStops = [ new (72 * 2) ], }; var to = new TextSplitOptions(tl) { MinLinesInFirstParagraph = 2, MinLinesInLastParagraph = 2, RestMarginTop = rc.Left, }; // Open an arbitrary PDF, load it into a temp document and use the map to find some texts: using var fs = File.OpenRead(Path.Combine("Resources", "PDFs", "TimeSheet.pdf")); var doc1 = new GcPdfDocument(); doc1.Load(fs); var tmap = doc1.Pages[0].GetTextMap(); // We retrieve the text at a specific (known to us) geometric location on the page: float tx0 = 2.1f, ty0 = 3.37f, tx1 = 3.1f, ty1 = 3.4f; HitTestInfo htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72); HitTestInfo htiTo = tmap.HitTest(tx1 * 72, ty1 * 72); tmap.GetFragment(htiFrom.Pos, htiTo.Pos, out TextMapFragment range1, out string text1); tl.AppendLine($"Looked for text in rectangle x={tx0:F2}\", y={ty0:F2}\", width={tx1 - tx0:F2}\", height={ty1 - ty0:F2}\", found:", tf); tl.AppendLine(text1, tfFound); tl.AppendLine(); // Get all text fragments and their locations on the page: tl.AppendLine("List of all texts found on the page", tf); tmap.GetFragment(out TextMapFragment range, out string text); foreach (TextLineFragment tlf in range) { var coords = tmap.GetCoords(tlf); tl.Append($"Text at ({coords.B.X / 72:F2}\",{coords.B.Y / 72:F2}\"):\t", tf); tl.AppendLine(tmap.GetText(tlf), tfFound); } // Print the results: tl.PerformLayout(true); while (true) { // 'rest' will accept the text that did not fit: var splitResult = tl.Split(to, out TextLayout rest); doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty); if (splitResult != SplitResult.Split) break; tl = rest; doc.NewPage(); } // Append the original document for reference: doc.MergeWithDocument(doc1, new MergeDocumentOptions()); // Done: doc.Save(stream); return doc.Pages.Count; } } }