Table Of Contents
Table Of Contents

Source code for gluoncv.data.transforms.bbox

"""Bounding boxes transformation functions."""
from __future__ import division
import numpy as np

__all__ = ['crop', 'flip', 'resize', 'translate', 'affine_transform', 'get_affine_transform']

[docs]def crop(bbox, crop_box=None, allow_outside_center=True): """Crop bounding boxes according to slice area. This method is mainly used with image cropping to ensure bonding boxes fit within the cropped image. Parameters ---------- bbox : numpy.ndarray Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes. The second axis represents attributes of the bounding box. Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`, we allow additional attributes other than coordinates, which stay intact during bounding box transformations. crop_box : tuple Tuple of length 4. :math:`(x_{min}, y_{min}, width, height)` allow_outside_center : bool If `False`, remove bounding boxes which have centers outside cropping area. Returns ------- numpy.ndarray Cropped bounding boxes with shape (M, 4+) where M <= N. """ bbox = bbox.copy() if crop_box is None: return bbox if not len(crop_box) == 4: raise ValueError( "Invalid crop_box parameter, requires length 4, given {}".format(str(crop_box))) if sum([int(c is None) for c in crop_box]) == 4: return bbox l, t, w, h = crop_box left = l if l else 0 top = t if t else 0 right = left + (w if w else np.inf) bottom = top + (h if h else np.inf) crop_bbox = np.array((left, top, right, bottom)) if allow_outside_center: mask = np.ones(bbox.shape[0], dtype=bool) else: centers = (bbox[:, :2] + bbox[:, 2:4]) / 2 mask = np.logical_and(crop_bbox[:2] <= centers, centers < crop_bbox[2:]).all(axis=1) # transform borders bbox[:, :2] = np.maximum(bbox[:, :2], crop_bbox[:2]) bbox[:, 2:4] = np.minimum(bbox[:, 2:4], crop_bbox[2:4]) bbox[:, :2] -= crop_bbox[:2] bbox[:, 2:4] -= crop_bbox[:2] mask = np.logical_and(mask, (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)) bbox = bbox[mask] return bbox
[docs]def flip(bbox, size, flip_x=False, flip_y=False): """Flip bounding boxes according to image flipping directions. Parameters ---------- bbox : numpy.ndarray Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes. The second axis represents attributes of the bounding box. Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`, we allow additional attributes other than coordinates, which stay intact during bounding box transformations. size : tuple Tuple of length 2: (width, height). flip_x : bool Whether flip horizontally. flip_y : type Whether flip vertically. Returns ------- numpy.ndarray Flipped bounding boxes with original shape. """ if not len(size) == 2: raise ValueError("size requires length 2 tuple, given {}".format(len(size))) width, height = size bbox = bbox.copy() if flip_y: ymax = height - bbox[:, 1] ymin = height - bbox[:, 3] bbox[:, 1] = ymin bbox[:, 3] = ymax if flip_x: xmax = width - bbox[:, 0] xmin = width - bbox[:, 2] bbox[:, 0] = xmin bbox[:, 2] = xmax return bbox
[docs]def resize(bbox, in_size, out_size): """Resize bouding boxes according to image resize operation. Parameters ---------- bbox : numpy.ndarray Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes. The second axis represents attributes of the bounding box. Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`, we allow additional attributes other than coordinates, which stay intact during bounding box transformations. in_size : tuple Tuple of length 2: (width, height) for input. out_size : tuple Tuple of length 2: (width, height) for output. Returns ------- numpy.ndarray Resized bounding boxes with original shape. """ if not len(in_size) == 2: raise ValueError("in_size requires length 2 tuple, given {}".format(len(in_size))) if not len(out_size) == 2: raise ValueError("out_size requires length 2 tuple, given {}".format(len(out_size))) bbox = bbox.copy().astype(float) x_scale = out_size[0] / in_size[0] y_scale = out_size[1] / in_size[1] bbox[:, 1] = y_scale * bbox[:, 1] bbox[:, 3] = y_scale * bbox[:, 3] bbox[:, 0] = x_scale * bbox[:, 0] bbox[:, 2] = x_scale * bbox[:, 2] return bbox
[docs]def translate(bbox, x_offset=0, y_offset=0): """Translate bounding boxes by offsets. Parameters ---------- bbox : numpy.ndarray Numpy.ndarray with shape (N, 4+) where N is the number of bounding boxes. The second axis represents attributes of the bounding box. Specifically, these are :math:`(x_{min}, y_{min}, x_{max}, y_{max})`, we allow additional attributes other than coordinates, which stay intact during bounding box transformations. x_offset : int or float Offset along x axis. y_offset : int or float Offset along y axis. Returns ------- numpy.ndarray Translated bounding boxes with original shape. """ bbox = bbox.copy() bbox[:, :2] += (x_offset, y_offset) bbox[:, 2:4] += (x_offset, y_offset) return bbox
[docs]def affine_transform(pt, t): """Apply affine transform to a bounding box given transform matrix t. Parameters ---------- pt : numpy.ndarray Bounding box with shape (1, 2). t : numpy.ndarray Transformation matrix with shape (2, 3). Returns ------- numpy.ndarray New bounding box with shape (1, 2). """ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T new_pt = np.dot(t, new_pt) return new_pt[:2]
def get_rot_dir(src_point, rot_rad): """Get rotation direction. Parameters ---------- src_point : tuple of float Original point. rot_rad : float Rotation radian. Returns ------- tuple of float Rotation. """ sn, cs = np.sin(rot_rad), np.cos(rot_rad) src_result = [0, 0] src_result[0] = src_point[0] * cs - src_point[1] * sn src_result[1] = src_point[0] * sn + src_point[1] * cs return src_result def get_3rd_point(a, b): """Get the 3rd point position given first two points. Parameters ---------- a : tuple of float First point. b : tuple of float Second point. Returns ------- tuple of float Third point. """ direct = a - b return b + np.array([-direct[1], direct[0]], dtype=np.float32)
[docs]def get_affine_transform(center, scale, rot, output_size, shift=np.array([0, 0], dtype=np.float32), inv=0): """Get affine transform matrix given center, scale and rotation. Parameters ---------- center : tuple of float Center point. scale : float Scaling factor. rot : float Rotation degree. output_size : tuple of int (width, height) of the output size. shift : float Shift factor. inv : bool Whether inverse the computation. Returns ------- numpy.ndarray Affine matrix. """ from ...utils.filesystem import try_import_cv2 cv2 = try_import_cv2() if not isinstance(scale, np.ndarray) and not isinstance(scale, list): scale = np.array([scale, scale], dtype=np.float32) scale_tmp = scale src_w = scale_tmp[0] dst_w = output_size[0] dst_h = output_size[1] rot_rad = np.pi * rot / 180 src_dir = get_rot_dir([0, src_w * -0.5], rot_rad) dst_dir = np.array([0, dst_w * -0.5], np.float32) src = np.zeros((3, 2), dtype=np.float32) dst = np.zeros((3, 2), dtype=np.float32) src[0, :] = center + scale_tmp * shift src[1, :] = center + src_dir + scale_tmp * shift dst[0, :] = [dst_w * 0.5, dst_h * 0.5] dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir src[2:, :] = get_3rd_point(src[0, :], src[1, :]) dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) if inv: trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) else: trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) return trans