Customized dataset #3087

Vish19-code · 2025-03-04T14:39:30Z

Vish19-code
Mar 4, 2025

hello,

i am using customized dataset and python tools/create_data.py custom --root-path ./data/custom --out-dir ./data/custom --extra-tag custom
for dataset i am using this https://mmdetection3d.readthedocs.io/en/dev-1.x/advanced_guides/customize_dataset.html for multimodel_3dobject detction

use this for train before that i register in creat_data_py
also add in Custom_converter.py and utils.py but i have a problem can you help me how i have to solve

i will write my file

Copyright (c) OpenMMLab. All rights reserved.

from collections import OrderedDict
from pathlib import Path

import mmcv
import mmengine
import numpy as np

from mmdet3d.structures.ops import box_np_ops
from .custom_data_utils import get_custom_image_info
from mmengine import track_parallel_progress, track_iter_progress, dump, load

kitti_categories = ('Pedestrian', 'Cyclist', 'Car')

def convert_to_custom_info_version2(info):
"""Convert custom dataset info format to a KITTI-like format."""

if 'image' not in info or 'calib' not in info or 'point_cloud' not in info: info['image'] = { 'image_shape_0': info.get('img_shape_0', None), # First camera view 'image_idx': info.get('image_idx', None), 'image_path_0': f'images/images_0/{int(info["image_idx"]):06d}.png', 'image_shape_1': info.get('img_shape_1', None), # Second camera view 'image_path_1': f'images/images_1/{int(info["image_idx"]):06d}.png', } info['calib'] = { 'P0': info['calib/P0'], # Intrinsic matrix for Camera 0 'P1': info['calib/P1'], # Intrinsic matrix for Camera 1 'lidar2cam0': info['calib/lidar2cam0'], # LiDAR to Camera 0 'lidar2cam1': info['calib/lidar2cam1'], # LiDAR to Camera 1 'lidar2world': info['calib/lidar2world'], # Global LiDAR transformation } info['point_cloud'] = { 'points_path': f'points/{int(info["image_idx"]):06d}.bin', } # Load labels labels_file = f"labels/{info['image_idx']:06d}.txt" with open(labels_file, 'r') as f: lines = f.readlines() annos = {'name': [], 'gt_boxes': [], 'gt_classes': []} for line in lines: parts = line.strip().split() x, y, z, dx, dy, dz, yaw = map(float, parts[:7]) category_name = parts[7] # Define categories custom_categories = ('Pedestrian', 'Cyclist', 'Car') if category_name in custom_categories: class_id = custom_categories.index(category_name) else: continue # Skip unknown categories annos['name'].append(category_name) annos['gt_boxes'].append([x, y, z, dx, dy, dz, yaw]) annos['gt_classes'].append(class_id) # Convert lists to NumPy arrays annos['gt_boxes'] = np.array(annos['gt_boxes'], dtype=np.float32) if annos['gt_boxes'] else np.zeros((0, 7), dtype=np.float32) annos['gt_classes'] = np.array(annos['gt_classes'], dtype=np.int32) if annos['gt_classes'] else np.zeros((0,), dtype=np.int32) info['annos'] = annos

def _read_imageset_file(path):
with open(path, 'r') as f:
lines = f.readlines()
return [int(line) for line in lines]

class _NumPointsInGTCalculater:
"""Calculate the number of points inside ground truth boxes for the custom dataset.

Args: data_path (str): Path to the dataset root. relative_path (bool): Whether to use relative paths for files. remove_outside (bool, optional): Whether to remove points outside the image view. Default: True. num_features (int, optional): Number of features per point (x, y, z, intensity). Default: 4. num_worker (int, optional): Number of parallel workers for processing. Default: 8. """ def __init__(self, data_path, relative_path, remove_outside=True, num_features=4, num_worker=8) -> None: self.data_path = data_path self.relative_path = relative_path self.remove_outside = remove_outside self.num_features = num_features self.num_worker = num_worker def calculate_single(self, info): """Calculate the number of points inside GT boxes for a single frame.""" pc_info = info['point_cloud'] image_info = info['image'] calib = info['calib'] # Read the LiDAR point cloud data if self.relative_path: v_path = str(Path(self.data_path) / pc_info['points_path']) else: v_path = pc_info['points_path'] points_v = np.fromfile( v_path, dtype=np.float32, count=-1).reshape([-1, self.num_features]) # Select camera transformations P0 = calib['P0'] # Intrinsic matrix for Camera 0 P1 = calib['P1'] # Intrinsic matrix for Camera 1 lidar2cam0 = calib['lidar2cam0'] # LiDAR to Camera 0 transformation lidar2cam1 = calib['lidar2cam1'] # LiDAR to Camera 1 transformation # Remove points that are outside of the camera view (if enabled) if self.remove_outside: points_v = box_np_ops.remove_outside_points( points_v, lidar2cam0, P0, image_info['image_shape_0']) points_v = box_np_ops.remove_outside_points( points_v, lidar2cam1, P1, image_info['image_shape_1']) # Process annotations (ground truth bounding boxes) annos = info['annos'] gt_boxes = annos['gt_boxes'] # Convert GT boxes to LiDAR coordinate system gt_boxes_lidar = box_np_ops.box_camera_to_lidar(gt_boxes, lidar2cam0) # Compute number of LiDAR points inside each GT box indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32) return info def calculate(self, infos): """Process all frames in parallel to compute points inside GT boxes.""" ret_infos = mmengine.track_parallel_progress(self.calculate_single, infos, self.num_worker) for i, ret_info in enumerate(ret_infos): infos[i] = ret_info

def _calculate_num_points_in_gt(data_path,
infos,
relative_path,
remove_outside=True,
num_features=4):
"""Calculate number of points inside ground truth boxes for the custom dataset."""

for info in mmengine.track_iter_progress(infos): pc_info = info['point_cloud'] image_info = info['image'] calib = info['calib'] # Read the LiDAR point cloud data if relative_path: v_path = str(Path(data_path) / pc_info['points_path']) else: v_path = pc_info['points_path'] points_v = np.fromfile( v_path, dtype=np.float32, count=-1).reshape([-1, num_features]) # Select Camera Intrinsic and LiDAR-to-Camera Matrices P0 = calib['P0'] # Camera 0 intrinsic matrix P1 = calib['P1'] # Camera 1 intrinsic matrix lidar2cam0 = calib['lidar2cam0'] # LiDAR to Camera 0 lidar2cam1 = calib['lidar2cam1'] # LiDAR to Camera 1 # Remove points outside the image field of view if remove_outside: points_v = box_np_ops.remove_outside_points( points_v, lidar2cam0, P0, image_info['image_shape_0']) points_v = box_np_ops.remove_outside_points( points_v, lidar2cam1, P1, image_info['image_shape_1']) # Load Annotations (Ground Truth Bounding Boxes) annos = info['annos'] gt_boxes = annos['gt_boxes'] # Convert GT boxes from Camera to LiDAR coordinate system gt_boxes_lidar = box_np_ops.box_camera_to_lidar(gt_boxes, lidar2cam0) # Compute number of LiDAR points inside each GT box indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)

def create_custom_info_file(data_path,
pkl_prefix='custom',
save_path=None,
relative_path=True):
"""Create info file for the custom dataset in KITTI format.

Given the raw data, generate its related info file in `.pkl` format. Args: data_path (str): Path to the dataset root. pkl_prefix (str, optional): Prefix of the info file to be generated. Default: 'custom'. save_path (str, optional): Path to save the info file. Default: None. relative_path (bool, optional): Whether to use relative paths. Default: True. """ imageset_folder = Path(data_path) / 'ImageSets' train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt')) val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt')) test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt')) print('Generating info files. This may take several minutes...') if save_path is None: save_path = Path(data_path ='./data/custom') else: save_path = Path(save_path = False) # Process training data custom_infos_train = get_custom_image_info( data_path, training=True, points=True, calib=True, image_ids=train_img_ids, relative_path=relative_path) _calculate_num_points_in_gt(data_path, custom_infos_train, relative_path) filename = save_path / f'{pkl_prefix}_infos_train.pkl' print(f'Custom dataset info train file is saved to {filename}') mmengine.dump(custom_infos_train, filename) # Process validation data custom_infos_val = get_custom_image_info( data_path, training=True, points=True, calib=True, image_ids=val_img_ids, relative_path=relative_path) _calculate_num_points_in_gt(data_path, custom_infos_val, relative_path) filename = save_path / f'{pkl_prefix}_infos_val.pkl' print(f'Custom dataset info val file is saved to {filename}') mmengine.dump(custom_infos_val, filename) # Combine train and validation filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' print(f'Custom dataset info trainval file is saved to {filename}') mmengine.dump(custom_infos_train + custom_infos_val, filename) # Process test data custom_infos_test = get_custom_image_info( data_path, training=False, points=True, calib=True, image_ids=test_img_ids, relative_path=relative_path) filename = save_path / f'{pkl_prefix}_infos_test.pkl' print(f'Custom dataset info test file is saved to {filename}') mmengine.dump(custom_infos_test, filename)

def create_waymo_info_file(data_path,
pkl_prefix='waymo',
save_path=None,
relative_path=True,
max_sweeps=5,
workers=8):
"""Create info file of waymo dataset.

Given the raw data, generate its related info file in pkl format. Args: data_path (str): Path of the data root. pkl_prefix (str, optional): Prefix of the info file to be generated. Default: 'waymo'. save_path (str, optional): Path to save the info file. Default: None. relative_path (bool, optional): Whether to use relative path. Default: True. max_sweeps (int, optional): Max sweeps before the detection frame to be used. Default: 5. """ imageset_folder = Path(data_path) / 'ImageSets' train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt')) val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt')) test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt')) print('Generate info. this may take several minutes.') if save_path is None: save_path = Path(data_path) else: save_path = Path(save_path) waymo_infos_gatherer_trainval = WaymoInfoGatherer( data_path, training=True, velodyne=True, calib=True, pose=True, relative_path=relative_path, max_sweeps=max_sweeps, num_worker=workers) waymo_infos_gatherer_test = WaymoInfoGatherer( data_path, training=False, label_info=False, velodyne=True, calib=True, pose=True, relative_path=relative_path, max_sweeps=max_sweeps, num_worker=workers) num_points_in_gt_calculater = _NumPointsInGTCalculater( data_path, relative_path, num_features=6, remove_outside=False, num_worker=workers) waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids) num_points_in_gt_calculater.calculate(waymo_infos_train) filename = save_path / f'{pkl_prefix}_infos_train.pkl' print(f'Waymo info train file is saved to {filename}') mmengine.dump(waymo_infos_train, filename) waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids) num_points_in_gt_calculater.calculate(waymo_infos_val) filename = save_path / f'{pkl_prefix}_infos_val.pkl' print(f'Waymo info val file is saved to {filename}') mmengine.dump(waymo_infos_val, filename) filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' print(f'Waymo info trainval file is saved to {filename}') mmengine.dump(waymo_infos_train + waymo_infos_val, filename) waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids) filename = save_path / f'{pkl_prefix}_infos_test.pkl' print(f'Waymo info test file is saved to {filename}') mmengine.dump(waymo_infos_test, filename)

def _create_reduced_point_cloud(data_path,
info_path,
save_path=None,
back=False,
num_features=4,
front_camera_id=0):
"""Create reduced point clouds for given info.

Args: data_path (str): Path of original data. info_path (str): Path of data info. save_path (str, optional): Path to save reduced point cloud data. Default: None. back (bool, optional): Whether to flip the points to back. Default: False. num_features (int, optional): Number of point features. Default: 4. front_camera_id (int, optional): The referenced/front camera ID. Default: 0 (Use Camera 0 as reference). """ custom_infos = mmengine.load(info_path) for info in mmengine.track_iter_progress(custom_infos): pc_info = info['points'] image_info = info['image'] calib = info['calib'] # Load LiDAR point cloud data v_path = Path(data_path) / pc_info['points_path'] points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, num_features]) # Select Camera Intrinsic & Extrinsic Matrices if front_camera_id == 0: P = calib['P0'] # Intrinsic matrix for Camera 0 lidar2cam = calib['lidar2cam0'] # LiDAR → Camera 0 transformation image_shape = image_info['image_shape_0'] elif front_camera_id == 1: P = calib['P1'] # Intrinsic matrix for Camera 1 lidar2cam = calib['lidar2cam1'] # LiDAR → Camera 1 transformation image_shape = image_info['image_shape_1'] else: raise ValueError("Invalid front_camera_id! Use 0 for Camera 0 or 1 for Camera 1.") # Remove points that are outside of the selected camera's view points_v = box_np_ops.remove_outside_points(points_v, lidar2cam, P, image_shape) # Flip LiDAR points if 'back' is enabled if back: points_v[:, 0] = -points_v[:, 0] # Save the reduced point cloud if save_path is None: save_dir = v_path.parent.parent / (v_path.parent.stem + '_reduced') save_dir.mkdir(parents=True, exist_ok=True) save_filename = save_dir / v_path.name if back: save_filename = save_filename.with_name(save_filename.name + '_back') else: save_filename = Path(save_path) / v_path.name if back: save_filename = save_filename.with_name(save_filename.name + '_back') with open(save_filename, 'wb') as f: points_v.tofile(f)

def create_reduced_point_cloud(data_path,
pkl_prefix='custom',
train_info_path=None,
val_info_path=None,
test_info_path=None,
save_path=None,
with_back=False):
"""Create reduced point clouds for training/validation/testing.

Args: data_path (str): Path of original data. pkl_prefix (str): Prefix of info files. train_info_path (str, optional): Path of training set info. Default: None. val_info_path (str, optional): Path of validation set info. Default: None. test_info_path (str, optional): Path of test set info. Default: None. save_path (str, optional): Path to save reduced point cloud data. Default: None. with_back (bool, optional): Whether to flip the points to back. Default: False. """ if train_info_path is None: train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl' if val_info_path is None: val_info_path = Path(data_path) / f'{pkl_prefix}_infos_val.pkl' if test_info_path is None: test_info_path = Path(data_path) / f'{pkl_prefix}_infos_test.pkl' print('Creating reduced point cloud for training set...') _create_reduced_point_cloud(data_path, train_info_path, save_path) print('Creating reduced point cloud for validation set...') _create_reduced_point_cloud(data_path, val_info_path, save_path) print('Creating reduced point cloud for testing set...') _create_reduced_point_cloud(data_path, test_info_path, save_path) if with_back: _create_reduced_point_cloud(data_path, train_info_path, save_path, back=True) _create_reduced_point_cloud(data_path, val_info_path, save_path, back=True) _create_reduced_point_cloud(data_path, test_info_path, save_path, back=True)

def generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename):
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.

Args: ann_rec (dict): Original 3D annotation record. x1 (float): Minimum x coordinate of the bounding box. y1 (float): Minimum y coordinate of the bounding box. x2 (float): Maximum x coordinate of the bounding box. y2 (float): Maximum y coordinate of the bounding box. sample_data_token (str): Unique identifier for the sample data. filename (str): The corresponding image file where the annotation is present. Returns: dict: A sample 2D annotation record with: - file_name (str): Image file name. - image_id (str): Sample data token. - area (float): Area of the 2D bounding box. - category_name (str): Object category name. - category_id (int): Numeric category ID. - bbox (list[float]): Bounding box coordinates [x_min, y_min, width, height]. - iscrowd (int): Crowd indicator (0 for normal objects). """ repro_rec = OrderedDict() repro_rec['sample_data_token'] = sample_data_token coco_rec = {} # 🔹 Custom categories (update based on your dataset labels) custom_categories = ('Car', 'Pedestrian', 'Cyclist', 'Truck', 'Bus', 'Motorcycle') key_mapping = { 'name': 'category_name', 'num_points_in_gt': 'num_lidar_pts', 'sample_annotation_token': 'sample_annotation_token', 'sample_data_token': 'sample_data_token', } # Map annotation keys for key, value in ann_rec.items(): if key in key_mapping.keys(): repro_rec[key_mapping[key]] = value repro_rec['bbox_corners'] = [x1, y1, x2, y2] repro_rec['filename'] = filename # Create COCO-style record coco_rec['file_name'] = filename coco_rec['image_id'] = sample_data_token coco_rec['area'] = (y2 - y1) * (x2 - y1) # 🔹 Ensure category exists in custom categories category_name = repro_rec['category_name'] if category_name not in custom_categories: print(f"Warning: Unknown category '{category_name}' in dataset.") return None # Skip if the category is not in the defined set coco_rec['category_name'] = category_name coco_rec['category_id'] = custom_categories.index(category_name) coco_rec['bbox'] = [x1, y1, x2 - x1, y2 - y1] # Convert (x_min, y_min, x_max, y_max) to (x, y, width, height) coco_rec['iscrowd'] = 0 # Assume no crowd annotations return coco_rec

and my utils file

from collections import OrderedDict
from concurrent import futures as futures
from os import path as osp
from pathlib import Path

import mmengine
import numpy as np
from PIL import Image
from skimage import io

def get_image_index_str(img_idx, use_prefix_id=False):
if use_prefix_id:
return '{:07d}'.format(img_idx)
else:
return '{:06d}'.format(img_idx)

def get_custom_info_path(idx,
prefix,
sub_folder,
file_tail,
relative_path=True,
exist_check=True):
"""Generate file paths for images, LiDAR, and calibration data."""
img_idx_str = get_image_index_str(idx) + file_tail
prefix = Path(prefix)
file_path = Path(sub_folder) / img_idx_str

if exist_check and not (prefix / file_path).exists(): raise ValueError(f'File does not exist: {file_path}') return str(file_path) if relative_path else str(prefix / file_path)

def get_image_paths(idx, prefix, relative_path=True, exist_check=True):
"""Get paths for both camera images."""
return {
'image_0': get_custom_info_path(idx, prefix, 'images/images_0', '.png', relative_path, exist_check),
'image_1': get_custom_info_path(idx, prefix, 'images/images_1', '.png', relative_path, exist_check)
}

def get_label_path(idx, prefix, relative_path=True, exist_check=True):
"""Get path for label file (ground truth annotations)."""
return get_custom_info_path(idx, prefix, 'labels', '.txt', relative_path, exist_check)

def get_lidar_path(idx, prefix, relative_path=True, exist_check=True):
"""Get path for LiDAR point cloud file (.bin)."""
return get_custom_info_path(idx, prefix, 'points', '.bin', relative_path, exist_check)

def get_calib_path(idx, prefix, relative_path=True, exist_check=True):
"""Get path for calibration file."""
return get_custom_info_path(idx, prefix, 'calibs', '.txt', relative_path, exist_check)

def get_custom_label_anno(label_path):
"""
Parse annotation file for custom dataset.

Args: label_path (str): Path to the annotation file (.txt). Returns: dict: Parsed annotations with: - name (list[str]): Object categories. - gt_boxes (np.ndarray): 3D bounding boxes (x, y, z, dx, dy, dz, yaw). - gt_classes (np.ndarray): Class indices for each object. """ annotations = { 'name': [], 'gt_boxes': [], 'gt_classes': [] } with open(label_path, 'r') as f: lines = f.readlines() # custom dataset object categories custom_categories = ('Pedestrian', 'Cyclist', 'Car', 'Truck', 'Bus', 'Motorcycle') for line in lines: parts = line.strip().split() if len(parts) < 8: print(f"Warning: Skipping malformed line in {label_path}: {line}") continue # Skip lines that do not have enough data # Extract values x, y, z, dx, dy, dz, yaw = map(float, parts[:7]) category_name = parts[7] # Assign class index (if category exists) if category_name in custom_categories: class_id = custom_categories.index(category_name) else: print(f"Warning: Unknown category '{category_name}' in {label_path}. Skipping...") continue # Skip unknown classes # Store parsed data annotations['name'].append(category_name) annotations['gt_boxes'].append([x, y, z, dx, dy, dz, yaw]) annotations['gt_classes'].append(class_id) # Convert lists to NumPy arrays annotations['gt_boxes'] = np.array(annotations['gt_boxes'], dtype=np.float32) if annotations['gt_boxes'] else np.zeros((0, 7), dtype=np.float32) annotations['gt_classes'] = np.array(annotations['gt_classes'], dtype=np.int32) if annotations['gt_classes'] else np.zeros((0,), dtype=np.int32) return annotations

def _extend_matrix(mat):
mat = np.concatenate([mat, np.array([[0., 0., 0., 1.]])], axis=0)
return mat

def get_custom_image_info(data_path,
training=True,
label_info=True,
lidar=True,
calib=True,
image_ids=None,
relative_path=True):
"""
Extracts image, LiDAR, and calibration information for the custom dataset.

Args: data_path (str): Root dataset path. training (bool): True if loading training set. label_info (bool): Load label annotations. lidar (bool): Load LiDAR data. calib (bool): Load calibration data. image_ids (list): List of image indices. relative_path (bool): Use relative paths. Returns: list: List of dictionary info for each sample. """ root_path = Path(data_path) if image_ids is None: image_ids = sorted([int(p.stem) for p in (root_path / "labels").glob("*.txt")]) def process_sample(idx): info = {} image_info = { 'image_idx': idx, 'image_path_0': f'images/images_0/{idx:06d}.png', 'image_path_1': f'images/images_1/{idx:06d}.png', } lidar_info = {'points_path': f'points/{idx:06d}.bin'} calib_info = {} if label_info: label_path = root_path / "labels" / f"{idx:06d}.txt" annos = {'name': [], 'gt_boxes': [], 'gt_classes': []} with open(label_path, 'r') as f: lines = f.readlines() for line in lines: parts = line.strip().split() x, y, z, dx, dy, dz, yaw = map(float, parts[:7]) category_name = parts[7] # Define custom categories custom_categories = ('Pedestrian', 'Cyclist', 'Car') if category_name in custom_categories: class_id = custom_categories.index(category_name) else: continue # Skip unknown categories annos['name'].append(category_name) annos['gt_boxes'].append([x, y, z, dx, dy, dz, yaw]) annos['gt_classes'].append(class_id) annos['gt_boxes'] = np.array(annos['gt_boxes'], dtype=np.float32) if annos['gt_boxes'] else np.zeros((0, 7), dtype=np.float32) annos['gt_classes'] = np.array(annos['gt_classes'], dtype=np.int32) if annos['gt_classes'] else np.zeros((0,), dtype=np.int32) info['annos'] = annos if calib: calib_path = root_path / "calibs" / f"{idx:06d}.txt" with open(calib_path, 'r') as f: lines = f.readlines() calib_info = { 'P0': np.array([float(v) for v in lines[0].split()]).reshape(3, 4), 'P1': np.array([float(v) for v in lines[1].split()]).reshape(3, 4), 'lidar2cam0': np.array([float(v) for v in lines[2].split()]).reshape(3, 4), 'lidar2cam1': np.array([float(v) for v in lines[3].split()]).reshape(3, 4), 'lidar2world': np.array([float(v) for v in lines[4].split()]).reshape(4, 4), } info['calib'] = calib_info info['image'] = image_info info['point_cloud'] = lidar_info return info return [process_sample(idx) for idx in image_ids]

class customInfoGatherer:
"""
Parallel version of custom dataset information gathering.

Expected format: { "image": { "image_idx": int, "image_path_0": str, "image_path_1": str, "image_shape_0": [H, W], "image_shape_1": [H, W] }, "point_cloud": { "points_path": str, "num_features": int }, "calib": { "P0": np.array, # Camera 0 intrinsic matrix "P1": np.array, # Camera 1 intrinsic matrix "lidar2cam0": np.array, # LiDAR → Camera 0 transform "lidar2cam1": np.array, # LiDAR → Camera 1 transform }, "annos": { "name": list[str], "gt_boxes": np.array, "gt_classes": np.array } } """ def __init__(self, path, training=True, label_info=True, lidar=True, calib=True, num_worker=8, relative_path=True): self.path = path self.training = training self.label_info = label_info self.lidar = lidar self.calib = calib self.num_worker = num_worker self.relative_path = relative_path def gather_single(self, idx): root_path = Path(self.path) info = {} # Image Information image_info = { "image_idx": idx, "image_path_0": f"images/images_0/{idx:06d}.png", "image_path_1": f"images/images_1/{idx:06d}.png" } # Load Image Size img_path_0 = root_path / image_info["image_path_0"] img_path_1 = root_path / image_info["image_path_1"] if img_path_0.exists() and img_path_1.exists(): image_info["image_shape_0"] = np.array(Image.open(img_path_0).size[::-1], dtype=np.int32) image_info["image_shape_1"] = np.array(Image.open(img_path_1).size[::-1], dtype=np.int32) else: print(f"Warning: Missing image {idx:06d}.") # LiDAR Information lidar_info = { "points_path": f"points/{idx:06d}.bin", "num_features": 4 # Update based on your dataset (default: x, y, z, intensity) } # Calibration Information calib_info = {} calib_path = root_path / f"calibs/{idx:06d}.txt" if self.calib and calib_path.exists(): with open(calib_path, 'r') as f: lines = f.readlines() calib_info = { "P0": np.array([float(v) for v in lines[0].split()]).reshape(3, 4), "P1": np.array([float(v) for v in lines[1].split()]).reshape(3, 4), "lidar2cam0": np.array([float(v) for v in lines[2].split()]).reshape(3, 4), "lidar2cam1": np.array([float(v) for v in lines[3].split()]).reshape(3, 4), } else: print(f"Warning: Missing calibration file for {idx:06d}.") # Label Information annos = None label_path = root_path / f"labels/{idx:06d}.txt" if self.label_info and label_path.exists(): with open(label_path, 'r') as f: lines = f.readlines() annos = { "name": [], "gt_boxes": [], "gt_classes": [] } # Define categories (modify based on your dataset) custom_categories = ('Pedestrian', 'Cyclist', 'Car') for line in lines: parts = line.strip().split() x, y, z, dx, dy, dz, yaw = map(float, parts[:7]) category_name = parts[7] if category_name in custom_categories: class_id = custom_categories.index(category_name) annos["name"].append(category_name) annos["gt_boxes"].append([x, y, z, dx, dy, dz, yaw]) annos["gt_classes"].append(class_id) annos["gt_boxes"] = np.array(annos["gt_boxes"], dtype=np.float32) if annos["gt_boxes"] else np.zeros((0, 7), dtype=np.float32) annos["gt_classes"] = np.array(annos["gt_classes"], dtype=np.int32) if annos["gt_classes"] else np.zeros((0,), dtype=np.int32) # Construct Final Info info["image"] = image_info info["point_cloud"] = lidar_info if self.calib: info["calib"] = calib_info if annos is not None: info["annos"] = annos return info def gather(self, image_ids): """Runs information gathering in parallel.""" image_infos = mmengine.track_parallel_progress(self.gather_single, image_ids, self.num_worker) return list(image_infos)

def custom_anno_to_label_file(annos, folder):
"""
Convert annotations into label files for your custom dataset.

Args: annos (list[dict]): List of annotation dictionaries for each sample. folder (str or Path): Path to save label files. Each label file follows the format: ``` x y z dx dy dz yaw category_name ``` """ folder = Path(folder) folder.mkdir(parents=True, exist_ok=True) # Ensure the folder exists for anno in annos: image_idx = anno['metadata']['image_idx'] # Get image index label_lines = [] for j in range(len(anno['name'])): label_line = f"{anno['location'][j][0]:.2f} {anno['location'][j][1]:.2f} {anno['location'][j][2]:.2f} " \ f"{anno['dimensions'][j][0]:.2f} {anno['dimensions'][j][1]:.2f} {anno['dimensions'][j][2]:.2f} " \ f"{anno['rotation_y'][j]:.2f} {anno['name'][j]}" label_lines.append(label_line) # Save the label file in the correct format label_file = folder / f'{image_idx:06d}.txt' with open(label_file, 'w') as f: f.write('\n'.join(label_lines))

def add_difficulty_to_annos(info):
"""
Adds difficulty levels based on bounding box size and distance.

custom dataset does not have occlusion/truncation, so we use a simpler metric. """ min_box_size = [1.5, 1.0, 0.5] # (Larger objects = Easier) max_distance = [20, 40, 70] # (Closer objects = Easier) annos = info['annos'] dims = annos['dimensions'] # (l, w, h) locations = annos['location'] # (x, y, z) position # Compute object distance from the sensor distances = np.linalg.norm(locations, axis=1) difficulties = [] for i, (dim, dist) in enumerate(zip(dims, distances)): size = max(dim) # Use max dimension as object size if size >= min_box_size[0] and dist <= max_distance[0]: difficulties.append(0) # Easy elif size >= min_box_size[1] and dist <= max_distance[1]: difficulties.append(1) # Moderate elif size >= min_box_size[2] and dist <= max_distance[2]: difficulties.append(2) # Hard else: difficulties.append(-1) # Ignore annos['difficulty'] = np.array(difficulties, dtype=np.int32) return difficulties

def custom_result_line(result_dict, precision=4):
"""
Converts a detection result dictionary into a formatted line for the custom dataset.

Args: result_dict (dict): Contains object detection results (bounding box, category, etc.). precision (int): Decimal precision for floating-point values. Returns: str: A formatted line for annotation output. """ prec_float = '{' + ':.{}f'.format(precision) + '}' res_line = [] all_field_default = OrderedDict([ ('gt_boxes', None), # 3D bounding box [x, y, z, dx, dy, dz, yaw] ('name', None), # Object category (Car, Pedestrian, etc.) ('score', 1.0), # Confidence score (default: 1.0 if not available) ('camera_id', -1) # Camera ID (-1 means unknown) ]) res_dict = OrderedDict([(key, None) for key in all_field_default.keys()]) # Fill in provided values for key, val in result_dict.items(): if key in all_field_default: res_dict[key] = val # Construct the output line gt_boxes = res_dict['gt_boxes'] if gt_boxes is not None: res_line += [prec_float.format(v) for v in gt_boxes] # Add [x, y, z, dx, dy, dz, yaw] res_line.append(res_dict['name']) # Object category res_line.append(str(res_dict['camera_id'])) # Camera ID (important for multi-camera) return ' '.join(res_line)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Customized dataset #3087

Uh oh!

{{title}}

Uh oh!

Replies: 0 comments

Select a reply

Uh oh!

Customized dataset #3087

Uh oh!

Vish19-code Mar 4, 2025

Copyright (c) OpenMMLab. All rights reserved.

Replies: 0 comments

Vish19-code
Mar 4, 2025