Source code for dltk.networks.regression_classification.resnet

from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import tensorflow as tf

from dltk.core.units.residual_unit import vanilla_residual_unit_3d


[docs]def resnet_3d(inputs,
              num_classes,
              num_res_units=1,
              filters=(16, 32, 64, 128),
              strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)),
              mode=tf.estimator.ModeKeys.EVAL,
              use_bias=False,
              activation=tf.nn.relu6,
              kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'),
              bias_initializer=tf.zeros_initializer(),
              kernel_regularizer=None, bias_regularizer=None):
    """
    Regression/classification network based on a flexible resnet
    architecture [1] using residual units proposed in [2]. The downsampling
    of features is done via strided convolutions. On each resolution scale s
    are num_convolutions with filter size = filters[s]. strides[s]
    determine the downsampling factor at each resolution scale.

    [1] K. He et al. Deep residual learning for image recognition. CVPR 2016.
    [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016.

    Args:
        inputs (tf.Tensor): Input feature tensor to the network (rank 5
            required).
        num_classes (int): Number of output channels or classes.
        num_res_units (int, optional): Number of residual units per resolution
            scale.
        filters (tuple, optional): Number of filters for all residual units at
            each resolution scale.
        strides (tuple, optional): Stride of the first unit on a resolution
            scale.
        mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN,
            EVAL or PREDICT
        use_bias (bool, optional): Boolean, whether the layer uses a bias.
        activation (optional): A function to use as activation function.
        kernel_initializer (TYPE, optional): An initializer for the convolution
            kernel.
        bias_initializer (TYPE, optional): An initializer for the bias vector.
            If None, no bias will be applied.
        kernel_regularizer (None, optional): Optional regularizer for the
            convolution kernel.
        bias_regularizer (None, optional): Optional regularizer for the bias
            vector.

    Returns:
        dict: dictionary of output tensors

    """
    outputs = {}
    assert len(strides) == len(filters)
    assert len(inputs.get_shape().as_list()) == 5, \
        'inputs are required to have a rank of 5.'

    relu_op = tf.nn.relu6

    conv_params = {'padding': 'same',
                   'use_bias': use_bias,
                   'kernel_initializer': kernel_initializer,
                   'bias_initializer': bias_initializer,
                   'kernel_regularizer': kernel_regularizer,
                   'bias_regularizer': bias_regularizer}

    x = inputs

    # Inital convolution with filters[0]
    k = [s * 2 if s > 1 else 3 for s in strides[0]]
    x = tf.layers.conv3d(x, filters[0], k, strides[0], **conv_params)
    tf.logging.info('Init conv tensor shape {}'.format(x.get_shape()))

    # Residual feature encoding blocks with num_res_units at different
    # resolution scales res_scales
    res_scales = [x]
    saved_strides = []
    for res_scale in range(1, len(filters)):

        # Features are downsampled via strided convolutions. These are defined
        # in `strides` and subsequently saved
        with tf.variable_scope('unit_{}_0'.format(res_scale)):

            x = vanilla_residual_unit_3d(
                inputs=x,
                out_filters=filters[res_scale],
                strides=strides[res_scale],
                activation=activation,
                mode=mode)
        saved_strides.append(strides[res_scale])

        for i in range(1, num_res_units):

            with tf.variable_scope('unit_{}_{}'.format(res_scale, i)):

                x = vanilla_residual_unit_3d(
                    inputs=x,
                    out_filters=filters[res_scale],
                    strides=(1, 1, 1),
                    activation=activation,
                    mode=mode)
        res_scales.append(x)
        tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format(
            res_scale, x.get_shape()))

    # Global pool and last unit
    with tf.variable_scope('pool'):
        x = tf.layers.batch_normalization(
            x, training=mode == tf.estimator.ModeKeys.TRAIN)
        x = relu_op(x)

        axis = tuple(range(len(x.get_shape().as_list())))[1:-1]
        x = tf.reduce_mean(x, axis=axis, name='global_avg_pool')

        tf.logging.info('Global pool shape {}'.format(x.get_shape()))

    with tf.variable_scope('last'):
        x = tf.layers.dense(inputs=x,
                            units=num_classes,
                            activation=None,
                            use_bias=conv_params['use_bias'],
                            kernel_initializer=conv_params['kernel_initializer'],
                            bias_initializer=conv_params['bias_initializer'],
                            kernel_regularizer=conv_params['kernel_regularizer'],
                            bias_regularizer=conv_params['bias_regularizer'],
                            name='hidden_units')

        tf.logging.info('Output tensor shape {}'.format(x.get_shape()))

    # Define the outputs
    outputs['logits'] = x

    with tf.variable_scope('pred'):

        y_prob = tf.nn.softmax(x)
        outputs['y_prob'] = y_prob

        y_ = tf.argmax(x, axis=-1) \
            if num_classes > 1 \
            else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32)
        outputs['y_'] = y_

    return outputs