Source code for gluoncv.model_zoo.monodepthv2.resnet_encoder

"""Encoder module of Monodepth2
Code partially borrowed from
https://github.com/nianticlabs/monodepth2/blob/master/networks/resnet_encoder.py
"""
from __future__ import absolute_import, division, print_function

import os
import numpy as np
import mxnet as mx

from mxnet.gluon import nn
from mxnet.context import cpu
from ...model_zoo.resnetv1b import \
    resnet18_v1b, resnet34_v1b, resnet50_v1s, resnet101_v1s, resnet152_v1s


[docs]class ResnetEncoder(nn.HybridBlock): r"""Encoder of Monodepth2 Parameters ---------- backbone : string Pre-trained dilated backbone network type ('resnet18', 'resnet34', 'resnet50', 'resnet101' or 'resnet152'). pretrained : bool or str Refers to if the backbone is pretrained or not. If `True`, model weights of a model that was trained on ImageNet is loaded. num_input_images : int The number of input sequences. 1 for depth encoder, larger than 1 for pose encoder. (Default: 1) root : str, default '~/.mxnet/models' Location for keeping the model parameters. """ def __init__(self, backbone, pretrained, num_input_images=1, root=os.path.join(os.path.expanduser('~'), '.mxnet/models'), ctx=cpu(), **kwargs): super(ResnetEncoder, self).__init__() self.num_ch_enc = np.array([64, 64, 128, 256, 512]) resnets = {'resnet18': resnet18_v1b, 'resnet34': resnet34_v1b, 'resnet50': resnet50_v1s, 'resnet101': resnet101_v1s, 'resnet152': resnet152_v1s} num_layers = {'resnet18': 18, 'resnet34': 34, 'resnet50': 50, 'resnet101': 101, 'resnet152': 152} if backbone not in resnets: raise ValueError("{} is not a valid resnet".format(backbone)) if num_input_images > 1: self.encoder = resnets[backbone](pretrained=False, ctx=ctx, **kwargs) if pretrained: filename = os.path.join( root, 'resnet%d_v%db_multiple_inputs.params' % (num_layers[backbone], 1)) if not os.path.isfile(filename): from ..model_store import get_model_file loaded = mx.nd.load(get_model_file('resnet%d_v%db' % (num_layers[backbone], 1), tag=pretrained, root=root)) loaded['conv1.weight'] = mx.nd.concat( *([loaded['conv1.weight']] * num_input_images), dim=1) / num_input_images mx.nd.save(filename, loaded) self.encoder.load_parameters(filename, ctx=ctx) from ...data import ImageNet1kAttr attrib = ImageNet1kAttr() self.encoder.synset = attrib.synset self.encoder.classes = attrib.classes self.encoder.classes_long = attrib.classes_long else: self.encoder = resnets[backbone](pretrained=pretrained, ctx=ctx, **kwargs) if backbone not in ('resnet18', 'resnet34'): self.num_ch_enc[1:] *= 4
[docs] def hybrid_forward(self, F, input_image): # pylint: disable=unused-argument, missing-function-docstring self.features = [] x = (input_image - 0.45) / 0.225 x = self.encoder.conv1(x) x = self.encoder.bn1(x) self.features.append(self.encoder.relu(x)) self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) self.features.append(self.encoder.layer2(self.features[-1])) self.features.append(self.encoder.layer3(self.features[-1])) self.features.append(self.encoder.layer4(self.features[-1])) return self.features
def predict(self, input_image): # pylint: disable=unused-argument, missing-function-docstring self.features = [] x = (input_image - 0.45) / 0.225 x = self.encoder.conv1(x) x = self.encoder.bn1(x) self.features.append(self.encoder.relu(x)) self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) self.features.append(self.encoder.layer2(self.features[-1])) self.features.append(self.encoder.layer3(self.features[-1])) self.features.append(self.encoder.layer4(self.features[-1])) return self.features