Generating PDFs from HTML by spawning Chromium per request works initially but fails under load due to concurrency limits, memory issues, and crashing containers. This implementation solves these problems with a persistent browser instance and intelligent page pooling.
Core Benefits
- Eliminates per-request Chromium spawning
- Handles high concurrency with capped resources
- Self-healing on browser crashes
- Seamless ASP.NET Core and Kubernetes integration
Complete Implementation
Production-validated code with all original components preserved.
1. Initialization and Browser Launch
Purpose: Ensures Chromium is ready before handling requests
Key Features: Thread-safe startup with crash protection
public async Task InitializeAsync() { // Exclusive lock for thread safety await _browserGate.WaitAsync(); try { await StartBrowserAsync(); await SeedPagePoolAsync(); // Prewarm pages } finally { _browserGate.Release(); } } private async Task StartBrowserAsync() { // Handle Chromium download if missing var path = Environment.GetEnvironmentVariable("PUPPETEER_EXECUTABLE_PATH"); if (string.IsNullOrWhiteSpace(path) || !File.Exists(path)) { var fetcher = new BrowserFetcher(); var revision = await fetcher.DownloadAsync(); path = revision.GetExecutablePath(); } // Configure for stability and security _browser = await Puppeteer.LaunchAsync(new LaunchOptions { ExecutablePath = path, Args = [ "--no-sandbox", "--disable-dev-shm-usage", // Critical for Docker "--headless=new" // Modern headless mode ] }); // Automatic crash recovery _browser.Disconnected += async (_, _) => { await RestartBrowserAsync(); }; } private async Task SeedPagePoolAsync() { // Concurrent page creation var pageTasks = Enumerable.Range(0, PoolLimit) .Select(_ => SpawnPageAsync()); await Task.WhenAll(pageTasks); } private async Task SpawnPageAsync() { const int maxAttempts = 3; int tries = 0; // Retry logic with exponential backoff while (tries < maxAttempts) { try { var page = await _browser.NewPageAsync(); _pageQueue.Enqueue(page); _pageSemaphore.Release(); Interlocked.Increment(ref _activePages); log.LogInformation("Page spawned. Active count: {Count}", _activePages); return; } catch (Exception ex) { tries++; log.LogError(ex, "Page creation failed (attempt {Try})", tries); if (tries == maxAttempts) throw; await Task.Delay(TimeSpan.FromSeconds(Math.Pow(2, tries))); } } } 2. Page Pool Management
Purpose: Reuses pages to eliminate creation overhead
Key Features: Health validation and thread-safe operations
private async Task<IPage> CheckoutPageAsync() { // Timeout after 60 seconds if (!await _pageSemaphore.WaitAsync(TimeSpan.FromSeconds(60))) throw new TimeoutException("Timed out acquiring page"); // Validate page health while (_pageQueue.TryDequeue(out var page)) { if (await IsPageAliveAsync(page)) return page; await DisposePageAsync(page); // Remove unusable pages } // Create new page if under pool limit while (true) { var current = _activePages; if (current >= PoolLimit) break; if (Interlocked.CompareExchange(ref _activePages, current + 1, current) == current) { try { var newPage = await _browser.NewPageAsync(); log.LogInformation("Spawned fallback page. Active count: {Count}", _activePages); return newPage; } catch { Interlocked.Decrement(ref _activePages); throw; } } } throw new InvalidOperationException("Pool exhausted"); } private async Task ReturnPageAsync(IPage page) { // Cleanup closed pages if (page.IsClosed || !_browser.IsConnected) { await DisposePageAsync(page); return; } try { // Reset page state await page.GoToAsync("about:blank"); _pageQueue.Enqueue(page); _pageSemaphore.Release(); // Return to pool } catch (Exception ex) { log.LogError(ex, "Failed to reset page"); await DisposePageAsync(page); } } private async Task DisposePageAsync(IPage page) { try { if (!page.IsClosed) await page.CloseAsync(); } catch (Exception ex) { log.LogError(ex, "Error closing page"); } finally { Interlocked.Decrement(ref _activePages); log.LogInformation("Page disposed. Active count: {Count}", _activePages); } } // Page health validation private static async Task<bool> IsPageAliveAsync(IPage page) { try { return !page.IsClosed && await page.EvaluateExpressionAsync<string>("document.readyState") is "interactive" or "complete"; } catch { return false; } } 3. Crash Recovery System
Purpose: Automatically recovers from browser crashes
Key Features: Full state rebuild without downtime
private async Task RestartBrowserAsync() { // Freeze operations during recovery await _browserGate.WaitAsync(); try { // Cleanup dead pages while (_pageQueue.TryDequeue(out var p)) await DisposePageAsync(p); // Reset semaphore while (_pageSemaphore.CurrentCount > 0) await _pageSemaphore.WaitAsync(); // Close zombie browser if (_browser?.IsConnected == true) await _browser.CloseAsync(); // Reinitialize everything await StartBrowserAsync(); await SeedPagePoolAsync(); } finally { _browserGate.Release(); } } 4. PDF Rendering Core
Purpose: Converts HTML to PDF efficiently
Key Features: Network idle wait and retry logic
public async Task<byte[]> RenderPdfAsync(string html) { const int maxRetries = 3; int attempt = 0; while (attempt < maxRetries) { IPage? page = null; try { page = await CheckoutPageAsync(); // Wait for full page load await page.SetContentAsync(html, new NavigationOptions { WaitUntil = [WaitUntilNavigation.Networkidle0], Timeout = 60000 }); return await page.PdfDataAsync(DefaultPdfOptions); } catch (Exception ex) { attempt++; log.LogError(ex, "PDF generation failed (attempt {Try})", attempt); if (page != null) await DisposePageAsync(page); if (attempt == maxRetries) throw; await Task.Delay(200 * attempt); // Incremental backoff } finally { if (page != null) await ReturnPageAsync(page); } } throw new InvalidOperationException("PDF rendering failed"); } // PDF configuration defaults private static readonly PdfOptions DefaultPdfOptions = new() { PrintBackground = true, Landscape = false, Format = PaperFormat.A4, MarginOptions = new MarginOptions { Top = "30px", Bottom = "30px", Left = "20px", Right = "20px" } }; 5. Health Checks and Integration
Purpose: Kubernetes-ready monitoring
Key Features: Browser responsiveness testing
public async Task<bool> IsHealthyAsync(CancellationToken ct = default) { if (_browser == null || !_browser.IsConnected) return false; try { // Quick connectivity test var version = await _browser.GetVersionAsync() .WaitAsync(TimeSpan.FromSeconds(5), ct); return !string.IsNullOrEmpty(version); } catch { return false; } } public class ChromiumHealthCheck(ChromiumPdfRenderer renderer) : IHealthCheck { public async Task<HealthCheckResult> CheckHealthAsync( HealthCheckContext context, CancellationToken ct) { return await renderer.IsHealthyAsync(ct) ? HealthCheckResult.Healthy("Browser responsive") : HealthCheckResult.Unhealthy("Browser disconnected"); } } public static class ChromiumPdfServiceExtensions { public static IServiceCollection AddChromiumPdfRenderer(this IServiceCollection services) { services.AddSingleton<ChromiumPdfRenderer>(); services.AddSingleton<IHealthCheck, ChromiumHealthCheck>(); return services; } public static IHealthChecksBuilder AddChromiumHealthCheck(this IHealthChecksBuilder builder) { return builder.AddCheck<ChromiumHealthCheck>(nameof(ChromiumHealthCheck)); } public static async Task InitializeChromiumPdfRenderer(this IServiceProvider provider) { var renderer = provider.GetRequiredService<ChromiumPdfRenderer>(); await renderer.InitializeAsync(); // Critical initialization } } 6. Startup Configuration
Purpose: Ensures proper initialization sequence
Key Features: DI integration and health checks
var builder = WebApplication.CreateBuilder(args); // Service registration builder.Services .AddChromiumPdfRenderer() .AddHealthChecks() .AddChromiumHealthCheck(); var app = builder.Build(); // Initialize before handling requests await app.Services.InitializeChromiumPdfRenderer(); app.MapHealthChecks("/healthz"); app.Run(); 7. Usage in Controllers
public class PdfController(ChromiumPdfRenderer pdf) : ControllerBase { [HttpPost("/pdf")] public async Task<IActionResult> Render([FromBody] string html) { var buffer = await pdf.RenderPdfAsync(html); return File(buffer, "application/pdf"); } } Architectural Advantages
- Persistent Browser - Single instance handles all requests
- Page Pooling - Reusable pages eliminate creation overhead
- Automatic Recovery - Self-healing after crashes
- Concurrency Control - Semaphores prevent resource exhaustion
- Cloud Native - Built-in health checks for orchestration
Full Class Implementation
Complete solution with all original components:
namespace PdfGenerator { public class ChromiumPdfRenderer(ILogger<ChromiumPdfRenderer> log) { // Configuration and state management private const int PoolLimit = 20; private IBrowser _browser = null!; private readonly ConcurrentQueue<IPage> _pageQueue = new(); private readonly SemaphoreSlim _pageSemaphore = new(0, PoolLimit); private readonly SemaphoreSlim _browserGate = new(1, 1); private int _activePages; private static readonly PdfOptions DefaultPdfOptions = new() { /* ... */ }; // All methods from previous sections public async Task InitializeAsync() { /* ... */ } private async Task StartBrowserAsync() { /* ... */ } private async Task SeedPagePoolAsync() { /* ... */ } private async Task SpawnPageAsync() { /* ... */ } private async Task<IPage> CheckoutPageAsync() { /* ... */ } private async Task ReturnPageAsync(IPage page) { /* ... */ } private async Task DisposePageAsync(IPage page) { /* ... */ } private async Task RestartBrowserAsync() { /* ... */ } public async Task<byte[]> RenderPdfAsync(string html) { /* ... */ } public async Task<bool> IsHealthyAsync() { /* ... */ } } public class ChromiumHealthCheck(ChromiumPdfRenderer renderer) : IHealthCheck { public async Task<HealthCheckResult> CheckHealthAsync( HealthCheckContext context, CancellationToken ct) { /* ... */ } } public static class ChromiumPdfServiceExtensions { public static IServiceCollection AddChromiumPdfRenderer(this IServiceCollection services) { /* ... */ } public static IHealthChecksBuilder AddChromiumHealthCheck(this IHealthChecksBuilder builder) { /* ... */ } public static async Task InitializeChromiumPdfRenderer(this IServiceProvider provider) { /* ... */ } } } Key Takeaways
- Avoid per-request browsers - Use persistent instances
- Implement page pooling - Reuse pages with health checks
- Plan for failures - Build automatic recovery
- Control concurrency - Prevent resource exhaustion
- Monitor health - Essential for containerized environments
Top comments (0)