""" # -*- coding: utf-8 -*- ----------------------------------------------------------------------------------- # Refer: https://github.com/ghimiredhikura/Complex-YOLOv3 # Source : https://github.com/jeasinema/VoxelNet-tensorflow/blob/master/utils/utils.py """ import os import sys import math import numpy as np import torch src_dir = os.path.dirname(os.path.realpath(__file__)) # while not src_dir.endswith("sfa"): # src_dir = os.path.dirname(src_dir) if src_dir not in sys.path: sys.path.append(src_dir) from config import kitti_config as cnf def angle_in_limit(angle): # To limit the angle in -pi/2 - pi/2 limit_degree = 5 while angle >= np.pi / 2: angle -= np.pi while angle < -np.pi / 2: angle += np.pi if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi: angle = np.pi / 2 return angle def camera_to_lidar(x, y, z, V2C=None, R0=None, P2=None): p = np.array([x, y, z, 1]) if V2C is None or R0 is None: p = np.matmul(cnf.R0_inv, p) p = np.matmul(cnf.Tr_velo_to_cam_inv, p) else: R0_i = np.zeros((4, 4)) R0_i[:3, :3] = R0 R0_i[3, 3] = 1 p = np.matmul(np.linalg.inv(R0_i), p) p = np.matmul(inverse_rigid_trans(V2C), p) p = p[0:3] return tuple(p) def lidar_to_camera(x, y, z, V2C=None, R0=None, P2=None): p = np.array([x, y, z, 1]) if V2C is None or R0 is None: p = np.matmul(cnf.Tr_velo_to_cam, p) p = np.matmul(cnf.R0, p) else: p = np.matmul(V2C, p) p = np.matmul(R0, p) p = p[0:3] return tuple(p) def camera_to_lidar_point(points): # (N, 3) -> (N, 3) N = points.shape[0] points = np.hstack([points, np.ones((N, 1))]).T # (N,4) -> (4,N) points = np.matmul(cnf.R0_inv, points) points = np.matmul(cnf.Tr_velo_to_cam_inv, points).T # (4, N) -> (N, 4) points = points[:, 0:3] return points.reshape(-1, 3) def lidar_to_camera_point(points, V2C=None, R0=None): # (N, 3) -> (N, 3) N = points.shape[0] points = np.hstack([points, np.ones((N, 1))]).T if V2C is None or R0 is None: points = np.matmul(cnf.Tr_velo_to_cam, points) points = np.matmul(cnf.R0, points).T else: points = np.matmul(V2C, points) points = np.matmul(R0, points).T points = points[:, 0:3] return points.reshape(-1, 3) def camera_to_lidar_box(boxes, V2C=None, R0=None, P2=None): # (N, 7) -> (N, 7) x,y,z,h,w,l,r ret = [] for box in boxes: x, y, z, h, w, l, ry = box # print(x, y, z, h, w, l, ry) (x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -ry - np.pi / 2 # print(x, y, z, h, w, l, ry) # print("camera_to_lidar") # rz = angle_in_limit(rz) ret.append([x, y, z, h, w, l, rz]) return np.array(ret).reshape(-1, 7) def lidar_to_camera_box(boxes, V2C=None, R0=None, P2=None): # (N, 7) -> (N, 7) x,y,z,h,w,l,r ret = [] for box in boxes: x, y, z, h, w, l, rz = box # (x, y, z), h, w, l, ry = lidar_to_camera(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -rz - np.pi / 2 # ry = angle_in_limit(ry) ry = -rz - np.pi / 2 ret.append([x, y, z, h, w, l, ry]) return np.array(ret).reshape(-1, 7) def center_to_corner_box2d(boxes_center, coordinate='lidar'): # (N, 5) -> (N, 4, 2) N = boxes_center.shape[0] boxes3d_center = np.zeros((N, 7)) boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center boxes3d_corner = center_to_corner_box3d(boxes3d_center, coordinate=coordinate) return boxes3d_corner[:, 0:4, 0:2] def center_to_corner_box3d(boxes_center, coordinate='lidar'): # (N, 7) -> (N, 8, 3) N = boxes_center.shape[0] ret = np.zeros((N, 8, 3), dtype=np.float32) if coordinate == 'camera': boxes_center = camera_to_lidar_box(boxes_center) for i in range(N): box = boxes_center[i] translation = box[0:3] size = box[3:6] rotation = [0, 0, box[-1]] h, w, l = size[0], size[1], size[2] trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet [-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \ [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \ [0, 0, 0, 0, h, h, h, h]]) # re-create 3D bounding box in velodyne coordinate system yaw = rotation[2] rotMat = np.array([ [np.cos(yaw), -np.sin(yaw), 0.0], [np.sin(yaw), np.cos(yaw), 0.0], [0.0, 0.0, 1.0]]) cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8, 1)).T box3d = cornerPosInVelo.transpose() ret[i] = box3d if coordinate == 'camera': for idx in range(len(ret)): ret[idx] = lidar_to_camera_point(ret[idx]) return ret CORNER2CENTER_AVG = True def corner_to_center_box3d(boxes_corner, coordinate='camera'): # (N, 8, 3) -> (N, 7) x,y,z,h,w,l,ry/z if coordinate == 'lidar': for idx in range(len(boxes_corner)): boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx]) ret = [] for roi in boxes_corner: if CORNER2CENTER_AVG: # average version roi = np.array(roi) h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4) w = np.sum( np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) ) / 4 l = np.sum( np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) ) / 4 x = np.sum(roi[:, 0], axis=0) / 8 y = np.sum(roi[0:4, 1], axis=0) / 4 z = np.sum(roi[:, 2], axis=0) / 8 ry = np.sum( math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) ) / 8 if w > l: w, l = l, w ry = ry - np.pi / 2 elif l > w: l, w = w, l ry = ry - np.pi / 2 ret.append([x, y, z, h, w, l, ry]) else: # max version h = max(abs(roi[:4, 1] - roi[4:, 1])) w = np.max( np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) ) l = np.max( np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) ) x = np.sum(roi[:, 0], axis=0) / 8 y = np.sum(roi[0:4, 1], axis=0) / 4 z = np.sum(roi[:, 2], axis=0) / 8 ry = np.sum( math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) ) / 8 if w > l: w, l = l, w ry = angle_in_limit(ry + np.pi / 2) ret.append([x, y, z, h, w, l, ry]) if coordinate == 'lidar': ret = camera_to_lidar_box(np.array(ret)) return np.array(ret) def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0): # Input: # points: (N, 3) # rx/y/z: in radians # Output: # points: (N, 3) N = points.shape[0] points = np.hstack([points, np.ones((N, 1))]) mat1 = np.eye(4) mat1[3, 0:3] = tx, ty, tz points = np.matmul(points, mat1) if rx != 0: mat = np.zeros((4, 4)) mat[0, 0] = 1 mat[3, 3] = 1 mat[1, 1] = np.cos(rx) mat[1, 2] = -np.sin(rx) mat[2, 1] = np.sin(rx) mat[2, 2] = np.cos(rx) points = np.matmul(points, mat) if ry != 0: mat = np.zeros((4, 4)) mat[1, 1] = 1 mat[3, 3] = 1 mat[0, 0] = np.cos(ry) mat[0, 2] = np.sin(ry) mat[2, 0] = -np.sin(ry) mat[2, 2] = np.cos(ry) points = np.matmul(points, mat) if rz != 0: mat = np.zeros((4, 4)) mat[2, 2] = 1 mat[3, 3] = 1 mat[0, 0] = np.cos(rz) mat[0, 1] = -np.sin(rz) mat[1, 0] = np.sin(rz) mat[1, 1] = np.cos(rz) points = np.matmul(points, mat) return points[:, 0:3] def box_transform(boxes, tx, ty, tz, r=0, coordinate='lidar'): # Input: # boxes: (N, 7) x y z h w l rz/y # Output: # boxes: (N, 7) x y z h w l rz/y boxes_corner = center_to_corner_box3d(boxes, coordinate=coordinate) # (N, 8, 3) for idx in range(len(boxes_corner)): if coordinate == 'lidar': boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r) else: boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, ry=r) return corner_to_center_box3d(boxes_corner, coordinate=coordinate) def inverse_rigid_trans(Tr): ''' Inverse a rigid body transform matrix (3x4 as [R|t]) [R'|-R't; 0|1] ''' inv_Tr = np.zeros_like(Tr) # 3x4 inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3]) inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3]) return inv_Tr class Compose(object): def __init__(self, transforms, p=1.0): self.transforms = transforms self.p = p def __call__(self, lidar, labels): if np.random.random() <= self.p: for t in self.transforms: lidar, labels = t(lidar, labels) return lidar, labels class OneOf(object): def __init__(self, transforms, p=1.0): self.transforms = transforms self.p = p def __call__(self, lidar, labels): if np.random.random() <= self.p: choice = np.random.randint(low=0, high=len(self.transforms)) lidar, labels = self.transforms[choice](lidar, labels) return lidar, labels class Random_Rotation(object): def __init__(self, limit_angle=np.pi / 4, p=0.5): self.limit_angle = limit_angle self.p = p def __call__(self, lidar, labels): """ :param labels: # (N', 7) x, y, z, h, w, l, r :return: """ if np.random.random() <= self.p: angle = np.random.uniform(-self.limit_angle, self.limit_angle) lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle) labels = box_transform(labels, 0, 0, 0, r=angle, coordinate='lidar') return lidar, labels class Random_Scaling(object): def __init__(self, scaling_range=(0.95, 1.05), p=0.5): self.scaling_range = scaling_range self.p = p def __call__(self, lidar, labels): """ :param labels: # (N', 7) x, y, z, h, w, l, r :return: """ if np.random.random() <= self.p: factor = np.random.uniform(self.scaling_range[0], self.scaling_range[0]) lidar[:, 0:3] = lidar[:, 0:3] * factor labels[:, 0:6] = labels[:, 0:6] * factor return lidar, labels class Cutout(object): """Randomly mask out one or more patches from an image. Args: n_holes (int): Number of patches to cut out of each image. length (int): The length (in pixels) of each square patch. Refer from: https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py """ def __init__(self, n_holes, ratio, fill_value=0., p=1.0): self.n_holes = n_holes self.ratio = ratio assert 0. <= fill_value <= 1., "the fill value is in a range of 0 to 1" self.fill_value = fill_value self.p = p def __call__(self, img, targets): """ Args: img (Tensor): Tensor image of size (C, H, W). Returns: Tensor: Image with n_holes of dimension length x length cut out of it. """ if np.random.random() <= self.p: h = img.size(1) w = img.size(2) h_cutout = int(self.ratio * h) w_cutout = int(self.ratio * w) for n in range(self.n_holes): y = np.random.randint(h) x = np.random.randint(w) y1 = np.clip(y - h_cutout // 2, 0, h) y2 = np.clip(y + h_cutout // 2, 0, h) x1 = np.clip(x - w_cutout // 2, 0, w) x2 = np.clip(x + w_cutout // 2, 0, w) img[:, y1: y2, x1: x2] = self.fill_value # Zero out the selected area # Remove targets that are in the selected area keep_target = [] for target_idx, target in enumerate(targets): _, _, target_x, target_y, target_w, target_l, _, _ = target if (x1 <= target_x * w <= x2) and (y1 <= target_y * h <= y2): continue keep_target.append(target_idx) targets = targets[keep_target] return img, targets