Source code for gluoncv.data.transforms.image
"""Extended image transformations to `mxnet.image`."""
from __future__ import division
import random
import numpy as np
import mxnet as mx
from mxnet import nd
from mxnet.base import numeric_types
__all__ = ['imresize', 'resize_long', 'resize_short_within',
'random_pca_lighting', 'random_expand', 'random_flip',
'resize_contain', 'ten_crop']
[docs]def imresize(src, w, h, interp=1):
"""Resize image with OpenCV.
This is a duplicate of mxnet.image.imresize for name space consistency.
Parameters
----------
src : mxnet.nd.NDArray
source image
w : int, required
Width of resized image.
h : int, required
Height of resized image.
interp : int, optional, default='1'
Interpolation method (default=cv2.INTER_LINEAR).
out : NDArray, optional
The output NDArray to hold the result.
Returns
-------
out : NDArray or list of NDArrays
The output of this function.
Examples
--------
>>> import mxnet as mx
>>> from gluoncv import data as gdata
>>> img = mx.random.uniform(0, 255, (300, 300, 3)).astype('uint8')
>>> print(img.shape)
(300, 300, 3)
>>> img = gdata.transforms.image.imresize(img, 200, 200)
>>> print(img.shape)
(200, 200, 3)
"""
from mxnet.image.image import _get_interp_method as get_interp
oh, ow, _ = src.shape
return mx.image.imresize(src, w, h, interp=get_interp(interp, (oh, ow, h, w)))
[docs]def resize_long(src, size, interp=2):
"""Resizes longer edge to size.
Note: `resize_long` uses OpenCV (not the CV2 Python library).
MXNet must have been built with OpenCV for `resize_long` to work.
Resizes the original image by setting the longer edge to size
and setting the shorter edge accordingly. This will ensure the new image will
fit into the `size` specified.
Resizing function is called from OpenCV.
Parameters
----------
src : NDArray
The original image.
size : int
The length to be set for the shorter edge.
interp : int, optional, default=2
Interpolation method used for resizing the image.
Possible values:
0: Nearest Neighbors Interpolation.
1: Bilinear interpolation.
2: Area-based (resampling using pixel area relation). It may be a
preferred method for image decimation, as it gives moire-free
results. But when the image is zoomed, it is similar to the Nearest
Neighbors method. (used by default).
3: Bicubic interpolation over 4x4 pixel neighborhood.
4: Lanczos interpolation over 8x8 pixel neighborhood.
9: Cubic for enlarge, area for shrink, bilinear for others
10: Random select from interpolation method mentioned above.
Note:
When shrinking an image, it will generally look best with AREA-based
interpolation, whereas, when enlarging an image, it will generally look best
with Bicubic (slow) or Bilinear (faster but still looks OK).
More details can be found in the documentation of OpenCV, please refer to
http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.
Returns
-------
NDArray
An 'NDArray' containing the resized image.
Example
-------
>>> with open("flower.jpeg", 'rb') as fp:
... str_image = fp.read()
...
>>> image = mx.img.imdecode(str_image)
>>> image
<NDArray 2321x3482x3 @cpu(0)>
>>> size = 640
>>> new_image = mx.img.resize_long(image, size)
>>> new_image
<NDArray 386x640x3 @cpu(0)>
"""
from mxnet.image.image import _get_interp_method as get_interp
h, w, _ = src.shape
if h > w:
new_h, new_w = size, size * w // h
else:
new_h, new_w = size * h // w, size
return imresize(src, new_w, new_h, interp=get_interp(interp, (h, w, new_h, new_w)))
[docs]def resize_short_within(src, short, max_size, mult_base=1, interp=2):
"""Resizes shorter edge to size but make sure it's capped at maximum size.
Note: `resize_short_within` uses OpenCV (not the CV2 Python library).
MXNet must have been built with OpenCV for `resize_short_within` to work.
Resizes the original image by setting the shorter edge to size
and setting the longer edge accordingly. Also this function will ensure
the new image will not exceed ``max_size`` even at the longer side.
Resizing function is called from OpenCV.
Parameters
----------
src : NDArray
The original image.
short : int
Resize shorter side to ``short``.
max_size : int
Make sure the longer side of new image is smaller than ``max_size``.
mult_base : int, default is 1
Width and height are rounded to multiples of `mult_base`.
interp : int, optional, default=2
Interpolation method used for resizing the image.
Possible values:
0: Nearest Neighbors Interpolation.
1: Bilinear interpolation.
2: Area-based (resampling using pixel area relation). It may be a
preferred method for image decimation, as it gives moire-free
results. But when the image is zoomed, it is similar to the Nearest
Neighbors method. (used by default).
3: Bicubic interpolation over 4x4 pixel neighborhood.
4: Lanczos interpolation over 8x8 pixel neighborhood.
9: Cubic for enlarge, area for shrink, bilinear for others
10: Random select from interpolation method mentioned above.
Note:
When shrinking an image, it will generally look best with AREA-based
interpolation, whereas, when enlarging an image, it will generally look best
with Bicubic (slow) or Bilinear (faster but still looks OK).
More details can be found in the documentation of OpenCV, please refer to
http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.
Returns
-------
NDArray
An 'NDArray' containing the resized image.
Example
-------
>>> with open("flower.jpeg", 'rb') as fp:
... str_image = fp.read()
...
>>> image = mx.img.imdecode(str_image)
>>> image
<NDArray 2321x3482x3 @cpu(0)>
>>> new_image = resize_short_within(image, short=800, max_size=1000)
>>> new_image
<NDArray 667x1000x3 @cpu(0)>
>>> new_image = resize_short_within(image, short=800, max_size=1200)
>>> new_image
<NDArray 800x1200x3 @cpu(0)>
>>> new_image = resize_short_within(image, short=800, max_size=1200, mult_base=32)
>>> new_image
<NDArray 800x1184x3 @cpu(0)>
"""
from mxnet.image.image import _get_interp_method as get_interp
h, w, _ = src.shape
im_size_min, im_size_max = (h, w) if w > h else (w, h)
scale = float(short) / float(im_size_min)
if np.round(scale * im_size_max / mult_base) * mult_base > max_size:
# fit in max_size
scale = float(np.floor(max_size / mult_base) * mult_base) / float(im_size_max)
new_w, new_h = (int(np.round(w * scale / mult_base) * mult_base),
int(np.round(h * scale / mult_base) * mult_base))
return imresize(src, new_w, new_h, interp=get_interp(interp, (h, w, new_h, new_w)))
[docs]def random_pca_lighting(src, alphastd, eigval=None, eigvec=None):
"""Apply random pca lighting noise to input image.
Parameters
----------
img : mxnet.nd.NDArray
Input image with HWC format.
alphastd : float
Noise level [0, 1) for image with range [0, 255].
eigval : list of floats.
Eigen values, defaults to [55.46, 4.794, 1.148].
eigvec : nested lists of floats
Eigen vectors with shape (3, 3), defaults to
[[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]].
Returns
-------
mxnet.nd.NDArray
Augmented image.
"""
if alphastd <= 0:
return src
if eigval is None:
eigval = np.array([55.46, 4.794, 1.148])
if eigvec is None:
eigvec = np.array([[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]])
alpha = np.random.normal(0, alphastd, size=(3,))
rgb = np.dot(eigvec * alpha, eigval)
src += nd.array(rgb, ctx=src.context)
return src
[docs]def random_expand(src, max_ratio=4, fill=0, keep_ratio=True):
"""Random expand original image with borders, this is identical to placing
the original image on a larger canvas.
Parameters
----------
src : mxnet.nd.NDArray
The original image with HWC format.
max_ratio : int or float
Maximum ratio of the output image on both direction(vertical and horizontal)
fill : int or float or array-like
The value(s) for padded borders. If `fill` is numerical type, RGB channels
will be padded with single value. Otherwise `fill` must have same length
as image channels, which resulted in padding with per-channel values.
keep_ratio : bool
If `True`, will keep output image the same aspect ratio as input.
Returns
-------
mxnet.nd.NDArray
Augmented image.
tuple
Tuple of (offset_x, offset_y, new_width, new_height)
"""
if max_ratio <= 1:
return src, (0, 0, src.shape[1], src.shape[0])
h, w, c = src.shape
ratio_x = random.uniform(1, max_ratio)
if keep_ratio:
ratio_y = ratio_x
else:
ratio_y = random.uniform(1, max_ratio)
oh, ow = int(h * ratio_y), int(w * ratio_x)
off_y = random.randint(0, oh - h)
off_x = random.randint(0, ow - w)
# make canvas
if isinstance(fill, numeric_types):
dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype)
else:
fill = nd.array(fill, dtype=src.dtype, ctx=src.context)
if not c == fill.size:
raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
dst = nd.tile(fill.reshape((1, c)), reps=(oh * ow, 1)).reshape((oh, ow, c))
dst[off_y:off_y+h, off_x:off_x+w, :] = src
return dst, (off_x, off_y, ow, oh)
[docs]def random_flip(src, px=0, py=0, copy=False):
"""Randomly flip image along horizontal and vertical with probabilities.
Parameters
----------
src : mxnet.nd.NDArray
Input image with HWC format.
px : float
Horizontal flip probability [0, 1].
py : float
Vertical flip probability [0, 1].
copy : bool
If `True`, return a copy of input
Returns
-------
mxnet.nd.NDArray
Augmented image.
tuple
Tuple of (flip_x, flip_y), records of whether flips are applied.
"""
flip_y = np.random.choice([False, True], p=[1-py, py])
flip_x = np.random.choice([False, True], p=[1-px, px])
if flip_y:
src = nd.flip(src, axis=0)
if flip_x:
src = nd.flip(src, axis=1)
if copy:
src = src.copy()
return src, (flip_x, flip_y)
[docs]def resize_contain(src, size, fill=0):
"""Resize the image to fit in the given area while keeping aspect ratio.
If both the height and the width in `size` are larger than
the height and the width of input image, the image is placed on
the center with an appropriate padding to match `size`.
Otherwise, the input image is scaled to fit in a canvas whose size
is `size` while preserving aspect ratio.
Parameters
----------
src : mxnet.nd.NDArray
The original image with HWC format.
size : tuple
Tuple of length 2 as (width, height).
fill : int or float or array-like
The value(s) for padded borders. If `fill` is numerical type, RGB channels
will be padded with single value. Otherwise `fill` must have same length
as image channels, which resulted in padding with per-channel values.
Returns
-------
mxnet.nd.NDArray
Augmented image.
tuple
Tuple of (offset_x, offset_y, scaled_x, scaled_y)
"""
h, w, c = src.shape
ow, oh = size
scale_h = oh / h
scale_w = ow / w
scale = min(min(scale_h, scale_w), 1)
scaled_x = int(w * scale)
scaled_y = int(h * scale)
if scale < 1:
src = mx.image.imresize(src, scaled_x, scaled_y)
off_y = (oh - scaled_y) // 2 if scaled_y < oh else 0
off_x = (ow - scaled_x) // 2 if scaled_x < ow else 0
# make canvas
if isinstance(fill, numeric_types):
dst = nd.full(shape=(oh, ow, c), val=fill, dtype=src.dtype)
else:
fill = nd.array(fill, ctx=src.context)
if not c == fill.size:
raise ValueError("Channel and fill size mismatch, {} vs {}".format(c, fill.size))
dst = nd.repeat(fill, repeats=oh * ow).reshape((oh, ow, c))
dst[off_y:off_y+scaled_y, off_x:off_x+scaled_x, :] = src
return dst, (off_x, off_y, scaled_x, scaled_y)
[docs]def ten_crop(src, size):
"""Crop 10 regions from an array.
This is performed same as:
http://chainercv.readthedocs.io/en/stable/reference/transforms.html#ten-crop
This method crops 10 regions. All regions will be in shape
:obj`size`. These regions consist of 1 center crop and 4 corner
crops and horizontal flips of them.
The crops are ordered in this order.
* center crop
* top-left crop
* bottom-left crop
* top-right crop
* bottom-right crop
* center crop (flipped horizontally)
* top-left crop (flipped horizontally)
* bottom-left crop (flipped horizontally)
* top-right crop (flipped horizontally)
* bottom-right crop (flipped horizontally)
Parameters
----------
src : mxnet.nd.NDArray
Input image.
size : tuple
Tuple of length 2, as (width, height) of the cropped areas.
Returns
-------
mxnet.nd.NDArray
The cropped images with shape (10, size[1], size[0], C)
"""
h, w, _ = src.shape
ow, oh = size
if h < oh or w < ow:
raise ValueError(
"Cannot crop area {} from image with size ({}, {})".format(str(size), h, w))
center = src[(h - oh) // 2:(h + oh) // 2, (w - ow) // 2:(w + ow) // 2, :]
tl = src[0:oh, 0:ow, :]
bl = src[h - oh:h, 0:ow, :]
tr = src[0:oh, w - ow:w, :]
br = src[h - oh:h, w - ow:w, :]
crops = nd.stack(*[center, tl, bl, tr, br], axis=0)
crops = nd.concat(*[crops, nd.flip(crops, axis=2)], dim=0)
return crops