Table Of Contents
Table Of Contents

Source code for gluoncv.nn.feature

# pylint: disable=abstract-method
"""Feature extraction blocks.
Feature or Multi-Feature extraction is a key component in object detection.
Class predictor/Box predictor are usually applied on feature layer(s).
A good feature extraction mechanism is critical to performance.
"""
from __future__ import absolute_import

import mxnet as mx
from mxnet.base import string_types
from mxnet.gluon import HybridBlock, SymbolBlock
from mxnet.symbol import Symbol
from mxnet.symbol.contrib import SyncBatchNorm


def _parse_network(network, outputs, inputs, pretrained, ctx, **kwargs):
    """Parse network with specified outputs and other arguments.

    Parameters
    ----------
    network : str or HybridBlock or Symbol
        Logic chain: load from gluoncv.model_zoo if network is string.
        Convert to Symbol if network is HybridBlock
    outputs : str or iterable of str
        The name of layers to be extracted as features.
    inputs : iterable of str
        The name of input datas.
    pretrained : bool
        Use pretrained parameters as in gluon.model_zoo
    ctx : Context
        The context, e.g. mxnet.cpu(), mxnet.gpu(0).

    Returns
    -------
    inputs : list of Symbol
        Network input Symbols, usually ['data']
    outputs : list of Symbol
        Network output Symbols, usually as features
    params : ParameterDict
        Network parameters.
    """
    inputs = list(inputs) if isinstance(inputs, tuple) else inputs
    for i, inp in enumerate(inputs):
        if isinstance(inp, string_types):
            inputs[i] = mx.sym.var(inp)
        assert isinstance(inputs[i], Symbol), "Network expects inputs are Symbols."
    if len(inputs) == 1:
        inputs = inputs[0]
    else:
        inputs = mx.sym.Group(inputs)
    params = None
    prefix = ''
    if isinstance(network, string_types):
        from ..model_zoo import get_model
        network = get_model(network, pretrained=pretrained, ctx=ctx, **kwargs)
    if isinstance(network, HybridBlock):
        params = network.collect_params()
        prefix = network._prefix
        network = network(inputs)
    assert isinstance(network, Symbol), \
        "FeatureExtractor requires the network argument to be either " \
        "str, HybridBlock or Symbol, but got %s" % type(network)

    if isinstance(outputs, string_types):
        outputs = [outputs]
    assert len(outputs) > 0, "At least one outputs must be specified."
    outputs = [out if out.endswith('_output') else out + '_output' for out in outputs]
    outputs = [network.get_internals()[prefix + out] for out in outputs]
    return inputs, outputs, params


[docs]class FeatureExtractor(SymbolBlock): """Feature extractor. Parameters ---------- network : str or HybridBlock or Symbol Logic chain: load from gluoncv.model_zoo if network is string. Convert to Symbol if network is HybridBlock outputs : str or list of str The name of layers to be extracted as features inputs : list of str or list of Symbol The inputs of network. pretrained : bool Use pretrained parameters as in gluon.model_zoo ctx : Context The context, e.g. mxnet.cpu(), mxnet.gpu(0). """ def __init__(self, network, outputs, inputs=('data',), pretrained=False, ctx=mx.cpu(), **kwargs): inputs, outputs, params = _parse_network( network, outputs, inputs, pretrained, ctx, **kwargs) super(FeatureExtractor, self).__init__(outputs, inputs, params=params)
[docs]class FeatureExpander(SymbolBlock): """Feature extractor with additional layers to append. This is very common in vision networks where extra branches are attached to backbone network. Parameters ---------- network : str or HybridBlock or Symbol Logic chain: load from gluoncv.model_zoo if network is string. Convert to Symbol if network is HybridBlock. outputs : str or list of str The name of layers to be extracted as features num_filters : list of int Number of filters to be appended. use_1x1_transition : bool Whether to use 1x1 convolution between attached layers. It is effective reducing network size. use_bn : bool Whether to use BatchNorm between attached layers. reduce_ratio : float Channel reduction ratio of the transition layers. min_depth : int Minimum channel number of transition layers. global_pool : bool Whether to use global pooling as the last layer. pretrained : bool Use pretrained parameters as in gluon.model_zoo if `True`. ctx : Context The context, e.g. mxnet.cpu(), mxnet.gpu(0). inputs : list of str Name of input variables to the network. """ def __init__(self, network, outputs, num_filters, use_1x1_transition=True, use_bn=True, reduce_ratio=1.0, min_depth=128, global_pool=False, pretrained=False, ctx=mx.cpu(), inputs=('data',), **kwargs): inputs, outputs, params = _parse_network( network, outputs, inputs, pretrained, ctx, **kwargs) # append more layers y = outputs[-1] weight_init = mx.init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2) for i, f in enumerate(num_filters): if use_1x1_transition: num_trans = max(min_depth, int(round(f * reduce_ratio))) y = mx.sym.Convolution( y, num_filter=num_trans, kernel=(1, 1), no_bias=use_bn, name='expand_trans_conv{}'.format(i), attr={'__init__': weight_init}) if use_bn: y = mx.sym.BatchNorm(y, name='expand_trans_bn{}'.format(i)) y = mx.sym.Activation(y, act_type='relu', name='expand_trans_relu{}'.format(i)) y = mx.sym.Convolution( y, num_filter=f, kernel=(3, 3), pad=(1, 1), stride=(2, 2), no_bias=use_bn, name='expand_conv{}'.format(i), attr={'__init__': weight_init}) if use_bn: y = mx.sym.BatchNorm(y, name='expand_bn{}'.format(i)) y = mx.sym.Activation(y, act_type='relu', name='expand_reu{}'.format(i)) outputs.append(y) if global_pool: outputs.append(mx.sym.Pooling(y, pool_type='avg', global_pool=True, kernel=(1, 1))) super(FeatureExpander, self).__init__(outputs, inputs, params)
[docs]class FPNFeatureExpander(SymbolBlock): """Feature extractor with additional layers to append. This is specified for ``Feature Pyramid Network for Object Detection`` which implement ``Top-down pathway and lateral connections``. Parameters ---------- network : str or HybridBlock or Symbol Logic chain: load from gluon.model_zoo.vision if network is string. Convert to Symbol if network is HybridBlock. outputs : str or list of str The name of layers to be extracted as features num_filters : list of int e.g. [256, 256, 256, 256] Number of filters to be appended. use_1x1 : bool Whether to use 1x1 convolution use_upsample : bool Whether to use upsample use_elewadd : float Whether to use element-wise add operation use_p6 : bool Whether use P6 stage, this is used for RPN experiments in ori paper p6_conv : bool Whether to use convolution for P6 stage, if it is enabled, or just max pooling. no_bias : bool Whether use bias for Convolution operation. norm_layer : HybridBlock or SymbolBlock Type of normalization layer. norm_kwargs : dict Arguments for normalization layer. pretrained : bool Use pretrained parameters as in gluon.model_zoo if `True`. ctx : Context The context, e.g. mxnet.cpu(), mxnet.gpu(0). inputs : list of str Name of input variables to the network. """ def __init__(self, network, outputs, num_filters, use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=False, p6_conv=True, no_bias=True, pretrained=False, norm_layer=None, norm_kwargs=None, ctx=mx.cpu(), inputs=('data',)): inputs, outputs, params = _parse_network(network, outputs, inputs, pretrained, ctx) if norm_kwargs is None: norm_kwargs = {} # e.g. For ResNet50, the feature is : # outputs = ['stage1_activation2', 'stage2_activation3', # 'stage3_activation5', 'stage4_activation2'] # with regard to [conv2, conv3, conv4, conv5] -> [C2, C3, C4, C5] # append more layers with reversed order : [P5, P4, P3, P2] y = outputs[-1] base_features = outputs[::-1] num_stages = len(num_filters) + 1 # usually 5 weight_init = mx.init.Xavier(rnd_type='uniform', factor_type='in', magnitude=1.) tmp_outputs = [] # num_filter is 256 in ori paper for i, (bf, f) in enumerate(zip(base_features, num_filters)): if i == 0: if use_1x1: y = mx.sym.Convolution(y, num_filter=f, kernel=(1, 1), pad=(0, 0), stride=(1, 1), no_bias=no_bias, name="P{}_conv_lat".format(num_stages - i), attr={'__init__': weight_init}) if norm_layer is not None: if norm_layer is SyncBatchNorm: norm_kwargs['key'] = "P{}_lat_bn".format(num_stages - i) norm_kwargs['name'] = "P{}_lat_bn".format(num_stages - i) y = norm_layer(y, **norm_kwargs) if use_p6 and p6_conv: # method 2 : use conv (Deformable use this) y_p6 = mx.sym.Convolution(y, num_filter=f, kernel=(3, 3), pad=(1, 1), stride=(2, 2), no_bias=no_bias, name='P{}_conv1'.format(num_stages + 1), attr={'__init__': weight_init}) if norm_layer is not None: if norm_layer is SyncBatchNorm: norm_kwargs['key'] = "P{}_pre_bn".format(num_stages + 1) norm_kwargs['name'] = "P{}_pre_bn".format(num_stages + 1) y_p6 = norm_layer(y_p6, **norm_kwargs) else: if use_1x1: bf = mx.sym.Convolution(bf, num_filter=f, kernel=(1, 1), pad=(0, 0), stride=(1, 1), no_bias=no_bias, name="P{}_conv_lat".format(num_stages - i), attr={'__init__': weight_init}) if norm_layer is not None: if norm_layer is SyncBatchNorm: norm_kwargs['key'] = "P{}_conv1_bn".format(num_stages - i) norm_kwargs['name'] = "P{}_conv1_bn".format(num_stages - i) bf = norm_layer(bf, **norm_kwargs) if use_upsample: y = mx.sym.UpSampling(y, scale=2, sample_type='nearest', name="P{}_upsp".format(num_stages - i)) if use_elewadd: # make two symbol alignment # method 1 : mx.sym.Crop # y = mx.sym.Crop(*[y, bf], name="P{}_clip".format(num_stages-i)) # method 2 : mx.sym.slice_like y = mx.sym.slice_like(y, bf, axes=(2, 3), name="P{}_clip".format(num_stages - i)) y = mx.sym.ElementWiseSum(bf, y, name="P{}_sum".format(num_stages - i)) # Reduce the aliasing effect of upsampling described in ori paper out = mx.sym.Convolution(y, num_filter=f, kernel=(3, 3), pad=(1, 1), stride=(1, 1), no_bias=no_bias, name='P{}_conv1'.format(num_stages - i), attr={'__init__': weight_init}) if i == 0 and use_p6 and not p6_conv: # method 2 : use max pool (Detectron use this) y_p6 = mx.sym.Pooling(out, pool_type='max', kernel=(1, 1), pad=(0, 0), stride=(2, 2), name="P{}_pre".format(num_stages + 1)) if norm_layer is not None: if norm_layer is SyncBatchNorm: norm_kwargs['key'] = "P{}_bn".format(num_stages - i) norm_kwargs['name'] = "P{}_bn".format(num_stages - i) out = norm_layer(out, **norm_kwargs) tmp_outputs.append(out) if use_p6: outputs = tmp_outputs[::-1] + [y_p6] # [P2, P3, P4, P5] + [P6] else: outputs = tmp_outputs[::-1] # [P2, P3, P4, P5] super(FPNFeatureExpander, self).__init__(outputs, inputs, params)