TextMap.vb
'' '' This code is part of Document Solutions for PDF demos. '' Copyright (c) MESCIUS inc. All rights reserved. '' Imports System.IO Imports System.Drawing Imports System.Numerics Imports System.Collections.Generic Imports System.Linq Imports GrapeCity.Documents.Text Imports GrapeCity.Documents.Drawing Imports GrapeCity.Documents.Pdf Imports GrapeCity.Documents.Pdf.Annotations Imports GrapeCity.Documents.Pdf.Graphics Imports GrapeCity.Documents.Pdf.TextMap '' This sample shows how to use the text map for a page in a PDF '' to find geometric positions of text lines on the page, '' and to locate the text at a specific position. '' The PDF used in this sample was created by TimeSheet. Public Class TextMap Function CreatePDF(ByVal stream As Stream) As Integer Dim doc = New GcPdfDocument() Dim page = doc.NewPage() Dim rc = Util.AddNote( "This sample loads the PDF created by the TimeSheet sample into a temporary GcPdfDocument, " + "gets the text map for the first page, and prints out the coordinates and texts of all " + "line fragments in the map. " + "It also uses the map's HitTest method to find the text at specific coordinates in the PDF " + "and prints the result. " + "The original TimeSheet.pdf used by this sample (consisting of 1 page) is appended for reference.", page) '' Setup text formatting and layout: Dim tf = New TextFormat() With { .Font = StandardFonts.Times, .FontSize = 13 } Dim tfFound = New TextFormat() With { .Font = StandardFonts.TimesBold, .FontSize = 14, .ForeColor = Color.DarkBlue } Dim tl = New TextLayout(72) With { .MaxWidth = doc.PageSize.Width, .MaxHeight = doc.PageSize.Height, .MarginAll = rc.Left, .MarginTop = rc.Bottom + 36, .TabStops = New List(Of TabStop)() From {New TabStop(72 * 2)} } Dim tso = New TextSplitOptions(tl) With { .MinLinesInFirstParagraph = 2, .MinLinesInLastParagraph = 2, .RestMarginTop = rc.Left } '' Open an arbitrary PDF, load it into a temp document and use the map to find some texts: Using fs = New FileStream(Path.Combine("Resources", "PDFs", "TimeSheet.pdf"), FileMode.Open, FileAccess.Read) Dim doc1 = New GcPdfDocument() doc1.Load(fs) Dim tmap = doc1.Pages(0).GetTextMap() '' We retrieve the text at a specific (known to us) geometric location on the page: Dim tx0 = 2.1F, ty0 = 3.37F, tx1 = 3.1F, ty1 = 3.5F Dim htiFrom = tmap.HitTest(tx0 * 72, ty0 * 72) Dim htiTo = tmap.HitTest(ty0 * 72, ty1 * 72) Dim range1 As TextMapFragment = Nothing, text1 As String = Nothing tmap.GetFragment(htiFrom.Pos, htiTo.Pos, range1, text1) tl.AppendLine($"Looked for text inside rectangle x={tx0:F2}"", y = {ty0: f2}"", width={tx1 - tx0:F2}"", height = {ty1 - ty0: f2}"", found:", tf) tl.AppendLine(text1, tfFound) tl.AppendLine() '' Get all text fragments and their locations on the page: tl.AppendLine("List of all texts found on the page", tf) Dim range As TextMapFragment = Nothing, text As String = Nothing tmap.GetFragment(range, text) For Each tlf In range Dim coords = tmap.GetCoords(tlf) tl.Append($"Text at ({coords.B.X / 72:F2}"", {coords.B.Y / 72:F2}""):{vbTab}", tf) tl.AppendLine(tmap.GetText(tlf), tfFound) Next '' Print the results: tl.PerformLayout(True) While True '' 'rest' will accept the text that did not fit: Dim rest As TextLayout = Nothing Dim splitResult = tl.Split(tso, rest) doc.Pages.Last.Graphics.DrawTextLayout(tl, PointF.Empty) If splitResult <> SplitResult.Split Then Exit While End If tl = rest doc.NewPage() End While '' Append the original document for reference: doc.MergeWithDocument(doc1, New MergeDocumentOptions()) '' Done: doc.Save(stream) End Using Return doc.Pages.Count End Function End Class