Table Of Contents
Table Of Contents

Source code for gluoncv.model_zoo.ssd.vgg_atrous

# pylint: disable=arguments-differ
"""VGG atrous network for object detection."""
from __future__ import division
import os
import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet.initializer import Xavier

__all__ = ['VGGAtrousExtractor', 'get_vgg_atrous_extractor', 'vgg16_atrous_300',
           'vgg16_atrous_512']


class Normalize(gluon.HybridBlock):
    """Normalize layer described in https://arxiv.org/abs/1512.02325.

    Parameters
    ----------
    n_channel : int
        Number of channels of input.
    initial : float
        Initial value for the rescaling factor.
    eps : float
        Small value to avoid division by zero.

    """
    def __init__(self, n_channel, initial=1, eps=1e-5):
        super(Normalize, self).__init__()
        self.eps = eps
        with self.name_scope():
            self.scale = self.params.get('normalize_scale', shape=(1, n_channel, 1, 1),
                                         init=mx.init.Constant(initial))

    def hybrid_forward(self, F, x, scale):
        x = F.L2Normalization(x, mode='channel', eps=self.eps)
        return F.broadcast_mul(x, scale)


class VGGAtrousBase(gluon.HybridBlock):
    """VGG Atrous multi layer base network. You must inherit from it to define
    how the features are computed.

    Parameters
    ----------
    layers : list of int
        Number of layer for vgg base network.
    filters : list of int
        Number of convolution filters for each layer.
    batch_norm : bool, default is False
        If `True`, will use BatchNorm layers.

    """
    def __init__(self, layers, filters, batch_norm=False, **kwargs):
        super(VGGAtrousBase, self).__init__(**kwargs)
        assert len(layers) == len(filters)
        self.init = {
            'weight_initializer': Xavier(
                rnd_type='gaussian', factor_type='out', magnitude=2),
            'bias_initializer': 'zeros'
        }
        with self.name_scope():
            # we use pre-trained weights from caffe, initial scale must change
            init_scale = mx.nd.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) * 255
            self.init_scale = self.params.get_constant('init_scale', init_scale)
            self.stages = nn.HybridSequential()
            for l, f in zip(layers, filters):
                stage = nn.HybridSequential(prefix='')
                with stage.name_scope():
                    for _ in range(l):
                        stage.add(nn.Conv2D(f, kernel_size=3, padding=1, **self.init))
                        if batch_norm:
                            stage.add(nn.BatchNorm())
                        stage.add(nn.Activation('relu'))
                self.stages.add(stage)

            # use dilated convolution instead of dense layers
            stage = nn.HybridSequential(prefix='dilated_')
            with stage.name_scope():
                stage.add(nn.Conv2D(1024, kernel_size=3, padding=6, dilation=6, **self.init))
                if batch_norm:
                    stage.add(nn.BatchNorm())
                stage.add(nn.Activation('relu'))
                stage.add(nn.Conv2D(1024, kernel_size=1, **self.init))
                if batch_norm:
                    stage.add(nn.BatchNorm())
                stage.add(nn.Activation('relu'))
            self.stages.add(stage)

            # normalize layer for 4-th stage
            self.norm4 = Normalize(filters[3], 20)

    def hybrid_forward(self, F, x, init_scale):
        raise NotImplementedError

[docs]class VGGAtrousExtractor(VGGAtrousBase): """VGG Atrous multi layer feature extractor which produces multiple output feature maps. Parameters ---------- layers : list of int Number of layer for vgg base network. filters : list of int Number of convolution filters for each layer. extras : list of list Extra layers configurations. batch_norm : bool If `True`, will use BatchNorm layers. """ def __init__(self, layers, filters, extras, batch_norm=False, **kwargs): super(VGGAtrousExtractor, self).__init__(layers, filters, batch_norm, **kwargs) with self.name_scope(): self.extras = nn.HybridSequential() for i, config in enumerate(extras): extra = nn.HybridSequential(prefix='extra%d_'%(i)) with extra.name_scope(): for f, k, s, p in config: extra.add(nn.Conv2D(f, k, s, p, **self.init)) if batch_norm: extra.add(nn.BatchNorm()) extra.add(nn.Activation('relu')) self.extras.add(extra)
[docs] def hybrid_forward(self, F, x, init_scale): x = F.broadcast_mul(x, init_scale) assert len(self.stages) == 6 outputs = [] for stage in self.stages[:3]: x = stage(x) x = F.Pooling(x, pool_type='max', kernel=(2, 2), stride=(2, 2), pooling_convention='full') x = self.stages[3](x) norm = self.norm4(x) outputs.append(norm) x = F.Pooling(x, pool_type='max', kernel=(2, 2), stride=(2, 2), pooling_convention='full') x = self.stages[4](x) x = F.Pooling(x, pool_type='max', kernel=(3, 3), stride=(1, 1), pad=(1, 1), pooling_convention='full') x = self.stages[5](x) outputs.append(x) for extra in self.extras: x = extra(x) outputs.append(x) return outputs
vgg_spec = { 11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]), 13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]), 16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]), 19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512]) } extra_spec = { 300: [((256, 1, 1, 0), (512, 3, 2, 1)), ((128, 1, 1, 0), (256, 3, 2, 1)), ((128, 1, 1, 0), (256, 3, 1, 0)), ((128, 1, 1, 0), (256, 3, 1, 0))], 512: [((256, 1, 1, 0), (512, 3, 2, 1)), ((128, 1, 1, 0), (256, 3, 2, 1)), ((128, 1, 1, 0), (256, 3, 2, 1)), ((128, 1, 1, 0), (256, 3, 2, 1)), ((128, 1, 1, 0), (256, 4, 1, 1))], }
[docs]def get_vgg_atrous_extractor(num_layers, im_size, pretrained=False, ctx=mx.cpu(), root=os.path.join('~', '.mxnet', 'models'), **kwargs): """Get VGG atrous feature extractor networks. Parameters ---------- num_layers : int VGG types, can be 11,13,16,19. im_size : int VGG detection input size, can be 300, 512. pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : mx.Context Context such as mx.cpu(), mx.gpu(0). root : str Model weights storing path. Returns ------- mxnet.gluon.HybridBlock The returned network. """ layers, filters = vgg_spec[num_layers] extras = extra_spec[im_size] net = VGGAtrousExtractor(layers, filters, extras, **kwargs) if pretrained: from ..model_store import get_model_file batch_norm_suffix = '_bn' if kwargs.get('batch_norm') else '' net.initialize(ctx=ctx) net.load_parameters(get_model_file('vgg%d_atrous%s'%(num_layers, batch_norm_suffix), tag=pretrained, root=root), ctx=ctx, allow_missing=True) return net
[docs]def vgg16_atrous_300(**kwargs): """Get VGG atrous 16 layer 300 in_size feature extractor networks.""" return get_vgg_atrous_extractor(16, 300, **kwargs)
[docs]def vgg16_atrous_512(**kwargs): """Get VGG atrous 16 layer 512 in_size feature extractor networks.""" return get_vgg_atrous_extractor(16, 512, **kwargs)