Source code for gluoncv.nn.bbox
# pylint: disable=arguments-differ
"""Bounding boxes operators"""
from __future__ import absolute_import
import numpy as np
from mxnet import gluon
[docs]class NumPyBBoxCornerToCenter(object):
"""Convert corner boxes to center boxes using numpy.
Corner boxes are encoded as (xmin, ymin, xmax, ymax)
Center boxes are encoded as (center_x, center_y, width, height)
Parameters
----------
split : bool
Whether split boxes to individual elements after processing.
axis : int, default is -1
Effective axis of the bounding box. Default is -1(the last dimension).
Returns
-------
A BxNx4 NDArray if split is False, or 4 BxNx1 NDArray if split is True
"""
def __init__(self, axis=-1, split=False):
super(NumPyBBoxCornerToCenter, self).__init__()
self._split = split
self._axis = axis
def __call__(self, x):
xmin, ymin, xmax, ymax = np.split(x, 4, axis=self._axis)
# note that we do not have +1 here since our nms and box iou does not.
# this is different that detectron.
width = xmax - xmin
height = ymax - ymin
x = xmin + width * 0.5
y = ymin + height * 0.5
if not self._split:
return np.concatenate((x, y, width, height), axis=self._axis)
else:
return x, y, width, height
[docs]class BBoxCornerToCenter(gluon.HybridBlock):
"""Convert corner boxes to center boxes.
Corner boxes are encoded as (xmin, ymin, xmax, ymax)
Center boxes are encoded as (center_x, center_y, width, height)
Parameters
----------
split : bool
Whether split boxes to individual elements after processing.
axis : int, default is -1
Effective axis of the bounding box. Default is -1(the last dimension).
Returns
-------
A BxNx4 NDArray if split is False, or 4 BxNx1 NDArray if split is True
"""
def __init__(self, axis=-1, split=False):
super(BBoxCornerToCenter, self).__init__()
self._split = split
self._axis = axis
[docs] def hybrid_forward(self, F, x):
"""Hybrid forward"""
xmin, ymin, xmax, ymax = F.split(x, axis=self._axis, num_outputs=4)
# note that we do not have +1 here since our nms and box iou does not.
# this is different that detectron.
width = xmax - xmin
height = ymax - ymin
x = xmin + width * 0.5
y = ymin + height * 0.5
if not self._split:
return F.concat(x, y, width, height, dim=self._axis)
else:
return x, y, width, height
[docs]class BBoxCenterToCorner(gluon.HybridBlock):
"""Convert center boxes to corner boxes.
Corner boxes are encoded as (xmin, ymin, xmax, ymax)
Center boxes are encoded as (center_x, center_y, width, height)
Parameters
----------
split : bool
Whether split boxes to individual elements after processing.
axis : int, default is -1
Effective axis of the bounding box. Default is -1(the last dimension).
Returns
-------
A BxNx4 NDArray if split is False, or 4 BxNx1 NDArray if split is True.
"""
def __init__(self, axis=-1, split=False):
super(BBoxCenterToCorner, self).__init__()
self._split = split
self._axis = axis
[docs] def hybrid_forward(self, F, x):
"""Hybrid forward"""
x, y, w, h = F.split(x, axis=self._axis, num_outputs=4)
hw = w * 0.5
hh = h * 0.5
xmin = x - hw
ymin = y - hh
xmax = x + hw
ymax = y + hh
if not self._split:
return F.concat(xmin, ymin, xmax, ymax, dim=self._axis)
else:
return xmin, ymin, xmax, ymax
[docs]class BBoxSplit(gluon.HybridBlock):
"""Split bounding boxes into 4 columns.
Parameters
----------
axis : int, default is -1
On which axis to split the bounding box. Default is -1(the last dimension).
squeeze_axis : boolean, default is `False`
If true, Removes the axis with length 1 from the shapes of the output arrays.
**Note** that setting `squeeze_axis` to ``true`` removes axis with length 1 only
along the `axis` which it is split.
Also `squeeze_axis` can be set to ``true`` only if ``input.shape[axis] == num_outputs``.
"""
def __init__(self, axis, squeeze_axis=False, **kwargs):
super(BBoxSplit, self).__init__(**kwargs)
self._axis = axis
self._squeeze_axis = squeeze_axis
[docs] def hybrid_forward(self, F, x):
return F.split(x, axis=self._axis, num_outputs=4, squeeze_axis=self._squeeze_axis)
[docs]class BBoxArea(gluon.HybridBlock):
"""Calculate the area of bounding boxes.
Parameters
----------
fmt : str, default is corner
Bounding box format, can be {'center', 'corner'}.
'center': {x, y, width, height}
'corner': {xmin, ymin, xmax, ymax}
axis : int, default is -1
Effective axis of the bounding box. Default is -1(the last dimension).
Returns
-------
A BxNx1 NDArray
"""
def __init__(self, axis=-1, fmt='corner', **kwargs):
super(BBoxArea, self).__init__(**kwargs)
if fmt.lower() == 'corner':
self._pre = BBoxCornerToCenter(split=True)
elif fmt.lower() == 'center':
self._pre = BBoxSplit(axis=axis)
else:
raise ValueError("Unsupported format: {}. Use 'corner' or 'center'.".format(fmt))
[docs] def hybrid_forward(self, F, x):
_, _, width, height = self._pre(x)
width = F.where(width > 0, width, F.zeros_like(width))
height = F.where(height > 0, height, F.zeros_like(height))
return width * height
[docs]class BBoxBatchIOU(gluon.HybridBlock):
"""Batch Bounding Box IOU.
Parameters
----------
axis : int
On which axis is the length-4 bounding box dimension.
fmt : str
BBox encoding format, can be 'corner' or 'center'.
'corner': (xmin, ymin, xmax, ymax)
'center': (center_x, center_y, width, height)
offset : float, default is 0
Offset is used if +1 is desired for computing width and height, otherwise use 0.
eps : float, default is 1e-15
Very small number to avoid division by 0.
"""
def __init__(self, axis=-1, fmt='corner', offset=0, eps=1e-15, **kwargs):
super(BBoxBatchIOU, self).__init__(**kwargs)
self._offset = offset
self._eps = eps
if fmt.lower() == 'center':
self._pre = BBoxCenterToCorner(split=True)
elif fmt.lower() == 'corner':
self._pre = BBoxSplit(axis=axis, squeeze_axis=True)
else:
raise ValueError("Unsupported format: {}. Use 'corner' or 'center'.".format(fmt))
[docs] def hybrid_forward(self, F, a, b):
"""Compute IOU for each batch
Parameters
----------
a : mxnet.nd.NDArray or mxnet.sym.Symbol
(B, N, 4) first input.
b : mxnet.nd.NDArray or mxnet.sym.Symbol
(B, M, 4) second input.
Returns
-------
mxnet.nd.NDArray or mxnet.sym.Symbol
(B, N, M) array of IOUs.
"""
al, at, ar, ab = self._pre(a)
bl, bt, br, bb = self._pre(b)
# (B, N, M)
left = F.broadcast_maximum(al.expand_dims(-1), bl.expand_dims(-2))
right = F.broadcast_minimum(ar.expand_dims(-1), br.expand_dims(-2))
top = F.broadcast_maximum(at.expand_dims(-1), bt.expand_dims(-2))
bot = F.broadcast_minimum(ab.expand_dims(-1), bb.expand_dims(-2))
# clip with (0, float16.max)
iw = F.clip(right - left + self._offset, a_min=0, a_max=6.55040e+04)
ih = F.clip(bot - top + self._offset, a_min=0, a_max=6.55040e+04)
i = iw * ih
# areas
area_a = ((ar - al + self._offset) * (ab - at + self._offset)).expand_dims(-1)
area_b = ((br - bl + self._offset) * (bb - bt + self._offset)).expand_dims(-2)
union = F.broadcast_add(area_a, area_b) - i
return i / (union + self._eps)
[docs]class BBoxClipToImage(gluon.HybridBlock):
"""Clip bounding box coordinates to image boundaries.
If multiple images are supplied and padded, must have additional inputs
of accurate image shape.
"""
def __init__(self, **kwargs):
super(BBoxClipToImage, self).__init__(**kwargs)
[docs] def hybrid_forward(self, F, x, img):
"""If images are padded, must have additional inputs for clipping
Parameters
----------
x: (B, N, 4) Bounding box coordinates.
img: (B, C, H, W) Image tensor.
Returns
-------
(B, N, 4) Bounding box coordinates.
"""
x = F.maximum(x, 0.0)
# window [B, 2] -> reverse hw -> tile [B, 4] -> [B, 1, 4], boxes [B, N, 4]
window = F.shape_array(img).slice_axis(axis=0, begin=2, end=None).expand_dims(0)
m = F.tile(F.reverse(window, axis=1), reps=(2,)).reshape((0, -4, 1, -1))
return F.broadcast_minimum(x, F.cast(m, dtype='float32'))