Recently I was developing a functionality where there was a requirement to get the first page of PDF (stored on s3) and convert it to an image. I have dug up the internet for this one but couldn't find anything to the point which will guide me on how to do this for AWS lambda. So here I am sharing my workaround.
Things you need to do before moving onto the code section
- Give s3 permissions to the lambda function.
- Add a Ghostscript layer: https://github.com/shelfio/ghostscript-lambda-layer
Here are the steps to be followed (I will write steps for code only)
1 => Getting a file from S3 and saving it temporarily.
function getFile(bucket, objectname) { return new Promise((res, rej) => { var params = { Bucket: bucket, Key: objectname }; s3.getObject(params, function (err, data) { if (err) { console.log(err); res(null); } const name = `/tmp/${objectname}`; fs.writeFile(name, data.Body, function (err) { if (err) res(null); res(name); }); }); }); }
const filepath = await getFile(bucket, key);
2 => Create a helper file for conversion code, name it pdf2Img.js
. This code will convert the tmp pdf file to a jpeg image. The code is inspired from pdf2png which is generating png image.
const exec = require("child_process").exec; const fs = require("fs"); const tmp = require("tmp"); // ghostscript executables path let projectPath = __dirname.split("\\"); projectPath.pop(); projectPath = projectPath.join("\\"); exports.ghostscriptPath = projectPath + "\\executables\\ghostScript"; exports.convert = (pdfPath, options) => { return new Promise((resolve, reject) => { if (!options.useLocalGS) { process.env.Path += ";" + exports.ghostscriptPath; } options.quality = options.quality || 100; // get temporary filepath tmp.file({ postfix: ".jpeg" }, function (err, imageFilepath, fd) { if (err) { resolve({ success: false, error: "Error getting second temporary filepath: " + err, }); return; } exec( "gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" + options.quality + " -dFirstPage=1 -dLastPage=1 -sOutputFile=" + imageFilepath + " " + pdfPath, (error, stdout, stderr) => { if (error !== null) { resolve({ success: false, error: "Error converting pdf to png: " + error, }); return; } const img = fs.readFileSync(imageFilepath); resolve({ success: true, data: img }); } ); }); }); };
To generate a jpeg, use the below command in exec
"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" + options.quality + " -dFirstPage=1 -dLastPage=1 -sOutputFile=" + imageFilepath + " " + pdfPath
To generate png use the below command in exec
"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" + options.quality + " -dFirstPage=1 -dLastPage=1 -sOutputFile=" + imageFilepath + " " + pdfPath
More details about Ghostscript options you can find it here https://www.ghostscript.com/doc/current/Use.htm
3 => Use helper function code in index file. Also set ghostscriptPath path to "/opt/bin/gs"
const pdf2Img = require("./pdf2Img"); pdf2Img.ghostscriptPath = "/opt/bin/gs";
Create a function that will execute the conversion code;
async function pdfToImage(pdfPath) { try { const response = await pdf2Img.convert(pdfPath, {}); if (!response.success) { console.log("Error in pdfToImage", response.error); return response; } return { contentType: "image/jpeg", data: response.data, }; } catch (e) { console.log("Error in pdfToImage", e.message); } }
const pdfToImageRes = await pdfToImage(filepath);
4 => Upload the converted image to the bucket.
function uploadFile(bucket, objectname, contentType, data) { return new Promise((res, rej) => { var params = { Bucket: bucket, Key: `${somePath}/${objectname}`, Body: data, ContentType: contentType, }; s3.putObject(params, function (err, data) { if (err) { console.log(err); res(null); } res(true); }); }); }
const responseUpload = await uploadFile( bucket, imageName, pdfToImageRes.contentType, pdfToImageRes.data );
That's it!
Top comments (0)