DEV Community

Xiao Ling
Xiao Ling

Posted on • Edited on • Originally published at dynamsoft.com

Passport MRZ Recognition with Dynamsoft C++ OCR SDK

MRZ stands for machine readable zone. A passport has a machine-readable zone which contains the information about name, nationality, passport number, date of birth, sex, and expiration date of passport. This article introduces how to use Dynamsoft OCR SDK to recognize MRZ and parse the corresponding information.

About Dynamsoft OCR SDK

Prerequisites

Windows

Linux

sudo apt install libopencv-dev cmake 
Enter fullscreen mode Exit fullscreen mode

Passport MRZ Recognition in C++

In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.

Setting up CMake project

I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.

Let us configure the header files and linking libraries of Dynamsoft OCR and OpenCV in CMakeLists:

cmake_minimum_required (VERSION 2.6) project (mrz) MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} ) # Check platforms if (CMAKE_HOST_WIN32) set(WINDOWS 1) elseif(CMAKE_HOST_UNIX) set(LINUX 1) endif() # Add search path for include and lib files MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" ) if(WINDOWS) link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/") elseif(LINUX) link_directories("${PROJECT_SOURCE_DIR}/platform/linux/") endif() include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/") # Add the executable find_package(OpenCV REQUIRED) add_executable(${PROJECT_NAME} mrzcv.cpp) if(WINDOWS) target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS}) else() target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS}) endif() # Copy DLLs if(WINDOWS) add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${PROJECT_SOURCE_DIR}/platform/windows/bin/" $<TARGET_FILE_DIR:${PROJECT_NAME}>) endif() 
Enter fullscreen mode Exit fullscreen mode

The character model trained by deep neural network (DNN) can be found from the C++ dev package. We need to copy the whole model folder to the output directory, so does the template file.

# Copy template add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${PROJECT_SOURCE_DIR}/template/" $<TARGET_FILE_DIR:${PROJECT_NAME}>) # Copy model files add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory "${PROJECT_SOURCE_DIR}/CharacterModel" $<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel) 
Enter fullscreen mode Exit fullscreen mode

The online documentation can help you figure out how template parameters work.

Coding for MRZ detection and information parsing

Once the build configuration is done, we can move to the code part.

  1. Initialize the OCR object. A valid license key is required.

    CLabelRecognition dlr; dlr.InitLicense("LICENSE-KEY"); 
  2. Append a template file.

    int ret = dlr.AppendSettingsFromFile("template-file"); 

    Note: if the DirectoryPath configured in the template file is a relative path, you must put the template file along with the model folder.

    "CharacterModelArray" : [ { "DirectoryPath": "CharacterModel", "FilterFilePath": "", "Name": "NumberUppercase" } ], 
  3. Call OCR recognition method. We can use the built-in function of OpenCV to measure the passing time. The TickMeter class simplifies the code of time calculation for Windows and Linux.

    TickMeter tm; tm.start(); errorCode = dlr.RecognizeByFile(pszImageFile, "locr"); tm.stop(); float costTime = tm.getTimeSec(); 
  4. Get the text recognition results that include the coordinate of the text zone, text lines, and text strings.

    DLRResultArray* pDLRResults = NULL; dlr.GetAllDLRResults(&pDLRResults); if (pDLRResults != NULL) { int rCount = pDLRResults->resultsCount; printf("\r\nRecognized %d results\r\n", rCount); for (int ri = 0; ri < rCount; ++ri) { printf("\r\nResult %d :\r\n", ri); int startX = 50, startY = 50; DLRResult* result = pDLRResults->results[ri]; int lCount = result->lineResultsCount; for (int li = 0; li < lCount; ++li) { printf("Line result %d: %s\r\n", li, result->lineResults[li]->text); DLRPoint *points = result->lineResults[li]->location.points; printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x, points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y); } } } else { printf("\r\nNo data detected.\r\n"); } dlr.FreeDLRResults(&pDLRResults); 
  5. Parse the MRZ string and extract the corresponding information.

    string line1 = result->lineResults[0]->text; string line2 = result->lineResults[1]->text; // https://en.wikipedia.org/wiki/Machine-readable_passport // Type string tmp = "Type: "; tmp.insert(tmp.length(), 1, line1[0]); printf("%s\r\n", tmp.c_str()); // Issuing country tmp = "Issuing country: "; line1.substr(2, 5); tmp += line1.substr(2, 3); printf("%s\r\n", tmp.c_str()); // Surname int index = 5; tmp = "Surname: "; for (; index < 44; index++) { if (line1[index] != '<') { tmp.insert(tmp.length(), 1, line1[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); // Given names tmp = "Given Names: "; index += 2; for (; index < 44; index++) { if (line1[index] != '<') { tmp.insert(tmp.length(), 1, line1[index]); } else { tmp.insert(tmp.length(), 1, ' '); } } printf("%s\r\n", tmp.c_str()); // Passport number tmp = "Passport number: "; index = 0; for (; index < 9; index++) { if (line2[index] != '<') { tmp.insert(tmp.length(), 1, line2[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); // Nationality tmp = "Nationality: "; tmp += line2.substr(10, 3); printf("%s\r\n", tmp.c_str()); // Date of birth tmp = line2.substr(13, 6); tmp.insert(2, "/"); tmp.insert(5, "/"); tmp = "Date of birth (YYMMDD): " + tmp; printf("%s\r\n", tmp.c_str()); // Sex tmp = "Sex: "; tmp.insert(tmp.length(), 1, line2[20]); printf("%s\r\n", tmp.c_str()); // Expiration date of passport tmp = line2.substr(21, 6); tmp.insert(2, "/"); tmp.insert(5, "/"); tmp = "Expiration date of passport (YYMMDD): " + tmp; printf("%s\r\n", tmp.c_str()); // Personal number if (line2[28] != '<') { tmp = "Personal number: "; for (index = 28; index < 42; index++) { if (line2[index] != '<') { tmp.insert(tmp.length(), 1, line2[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); } 

To make the program user-friendly, we use OpenCV to show the display window and draw relevant information on it.

line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness); line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness); line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness); line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness); drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10); imshow("Passport MRZ Recognition", ori); 
Enter fullscreen mode Exit fullscreen mode

passport mrz ocr

A further improvement is to use hconcat function to stitch the images for better comparison.

hconcat(before, after, newMat); imshow("Comparison", newMat); 
Enter fullscreen mode Exit fullscreen mode

passport mrz recognition

Source Code

https://github.com/yushulx/cmake-cpp-barcode-qrcode/tree/main/examples/10.x/mrz

Top comments (0)