Tech Stack
- Frontend: HTML, CSS, Bootstrap
- Backend: Node.js, Express.js, Multer, Tesseract.js, pdf-lib
- OCR Library: Tesseract.js
- PDF Library: pdf-lib
Setup Instructions
1. Install Dependencies
Backend
Navigate to the backend directory and install the required packages:
cd backend npm install
Frontend
You can use the same directory for serving static files or set up a separate frontend directory. No additional dependencies are needed for the provided HTML.
2. Configure the Backend
Ensure that you have the following packages installed:
npm install express cors multer pdf-lib tesseract.js pdf-poppler
3. Start the Backend Server
Run the server:
node server.js
4. Access the Application
Open your web browser and navigate to http://localhost:3000
. You should see the frontend UI where you can upload a PDF file.
How It Works
-
Frontend:
- Users select a PDF file using the file input.
- Upon form submission, the file is sent to the backend via a POST request.
- A loading spinner is displayed while the PDF is being processed.
-
Backend:
- Receives the PDF file, converts it to images if necessary, and performs OCR using Tesseract.js.
- Extracts text from the PDF images and returns the result to the frontend.
Frontend Code
Here is the index.html
file used for the frontend:
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>PDF Upload and OCR</title> <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" rel="stylesheet"> <style> .container { max-width: 600px; margin: auto; padding: 20px; } .file-label { display: block; margin: 10px 0; } .submit-btn { margin-top: 10px; } #loading { display: none; text-align: center; } </style> </head> <body> <div class="container"> <h1 class="text-center">Upload a PDF for OCR</h1> <form id="uploadForm" enctype="multipart/form-data"> <div class="form-group"> <label for="file" class="file-label">Select PDF</label> <input type="file" id="file" name="file" class="form-control-file" accept=".pdf" required> </div> <button type="submit" class="btn btn-primary submit-btn">Upload</button> </form> <div id="loading"> <div class="spinner-border text-primary" role="status"> <span class="sr-only">Loading...</span> </div> <p>Processing...</p> </div> <div id="result-container" class="mt-4"> <h2>OCR Result</h2> <pre id="result"></pre> </div> </div> <script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/@popperjs/core@2.5.4/dist/umd/popper.min.js"></script> <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script> <script> document.getElementById('uploadForm').addEventListener('submit', async function(event) { event.preventDefault(); // Prevent default form submission const formData = new FormData(); const fileInput = document.getElementById('file'); const file = fileInput.files[0]; formData.append("file", file); const requestOptions = { method: "POST", body: formData, redirect: "follow" }; // Show loading animation document.getElementById('loading').style.display = 'block'; try { const response = await fetch("http://localhost:3000/ocr", requestOptions); if (response.ok) { const result = await response.json(); document.getElementById('result').textContent = result.text; } else { document.getElementById('result').textContent = 'Error: ' + response.statusText; } } catch (error) { document.getElementById('result').textContent = 'Error: ' + error.message; } finally { // Hide loading animation document.getElementById('loading').style.display = 'none'; } }); </script> </body> </html>
Backend Code
Here is the server.js
file used for the backend:
const express = require("express"); const cors = require("cors"); const multer = require("multer"); const { PDFDocument } = require("pdf-lib"); const Tesseract = require("tesseract.js"); const path = require("path"); const fs = require("fs"); const pdfPoppler = require("pdf-poppler"); const app = express(); const port = 3000; // Enable CORS for all origins app.use(cors()); const storage = multer.memoryStorage(); const upload = multer({ storage: storage }); async function convertPdfToImages(pdfPath) { const outputDir = path.join(__dirname, "output"); if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir); } const options = { format: "png", out_dir: outputDir, out_prefix: "page", page_range: "1-", }; try { await pdfPoppler.convert(pdfPath, options); return fs .readdirSync(outputDir) .filter((file) => file.endsWith(".png")) .map((file) => path.join(outputDir, file)); } catch (error) { console.error("Error converting PDF to images:", error); throw error; } } // Sample route app.get("/", (req, res) => { res.send("Hello World!"); }); // POST route for OCR app.post("/ocr", upload.single("file"), async (req, res) => { if (!req.file) { return res.status(400).send("No file uploaded."); } try { const tempPdfPath = path.join(__dirname, "temp.pdf"); fs.writeFileSync(tempPdfPath, req.file.buffer); const imagePaths = await convertPdfToImages(tempPdfPath); const texts = await Promise.all( imagePaths.map(async (imagePath) => { const { data: { text }, } = await Tesseract.recognize(imagePath, "eng", { logger: (info) => console.log(info), }); return text; }) ); fs.unlinkSync(tempPdfPath); imagePaths.forEach((imagePath) => fs.unlinkSync(imagePath)); res.json({ text: texts.join("\n") }); } catch (error) { console.error(error); res.status(500).send("Error processing file."); } }); // 404 error handler app.use((req, res) => { res.status(404).send("Not Found"); }); app.listen(port, () => { console.log(`Server running at http://localhost:${port}`); });
Top comments (0)