Source code for dltk.networks.segmentation.fcn

from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import tensorflow as tf

from dltk.core.residual_unit import vanilla_residual_unit_3d
from dltk.core.upsample import linear_upsample_3d


[docs]def upscore_layer_3d(inputs,
                     inputs2,
                     out_filters,
                     in_filters=None,
                     strides=(2, 2, 2),
                     mode=tf.estimator.ModeKeys.EVAL,
                     use_bias=False,
                     kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'),
                     bias_initializer=tf.zeros_initializer(),
                     kernel_regularizer=None,
                     bias_regularizer=None):
    """Upscore layer according to [1].

    [1] J. Long et al. Fully convolutional networks for semantic segmentation.
    CVPR 2015.

    Args:
        inputs (tf.Tensor): Input features to be upscored.
        inputs2 (tf.Tensor): Higher resolution features from the encoder to add.
            out_filters (int): Number of output filters (typically, number of
            segmentation classes)
        in_filters (None, optional): None or number of input filters.
        strides (tuple, optional): Upsampling factor for a strided transpose
            convolution.
        mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN,
            EVAL or PREDICT
        use_bias (bool, optional): Boolean, whether the layer uses a bias.
        kernel_initializer (TYPE, optional): An initializer for the convolution
            kernel.
        bias_initializer (TYPE, optional): An initializer for the bias vector.
            If None, no bias will be applied.
        kernel_regularizer (None, optional): Optional regularizer for the
            convolution kernel.
        bias_regularizer (None, optional): Optional regularizer for the bias
            vector.

    Returns:
        tf.Tensor: Upscore tensor

    """
    conv_params = {'use_bias': use_bias,
                   'kernel_initializer': kernel_initializer,
                   'bias_initializer': bias_initializer,
                   'kernel_regularizer': kernel_regularizer,
                   'bias_regularizer': bias_regularizer}

    # Compute an upsampling shape dynamically from the input tensor. Input
    # filters are required to be static.
    if in_filters is None:
        in_filters = inputs.get_shape().as_list()[-1]

    assert len(inputs.get_shape().as_list()) == 5, \
        'inputs are required to have a rank of 5.'
    assert len(inputs.get_shape().as_list()) == len(inputs2.get_shape().as_list()), \
        'Ranks of input and input2 differ'

    # Account for differences in the number of input and output filters
    if in_filters != out_filters:
        x = tf.layers.conv3d(inputs=inputs,
                             filters=out_filters,
                             kernel_size=(1, 1, 1),
                             strides=(1, 1, 1),
                             padding='same',
                             name='filter_conversion',
                             **conv_params)
    else:
        x = inputs

    # Upsample inputs
    x = linear_upsample_3d(inputs=x, strides=strides)

    # Skip connection
    x2 = tf.layers.conv3d(inputs=inputs2,
                          filters=out_filters,
                          kernel_size=(1, 1, 1),
                          strides=(1, 1, 1),
                          padding='same',
                          **conv_params)

    x2 = tf.layers.batch_normalization(
        x2, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Return the element-wise sum
    return tf.add(x, x2)


[docs]def residual_fcn_3d(inputs,
                    num_classes,
                    num_res_units=1,
                    filters=(16, 32, 64, 128),
                    strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)),
                    mode=tf.estimator.ModeKeys.EVAL,
                    use_bias=False,
                    activation=tf.nn.relu6,
                    kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'),
                    bias_initializer=tf.zeros_initializer(),
                    kernel_regularizer=None,
                    bias_regularizer=None):
    """
    Image segmentation network based on an FCN architecture [1] using
    residual units [2] as feature extractors. Downsampling and upsampling
    of features is done via strided convolutions and transpose convolutions,
    respectively. On each resolution scale s are num_residual_units with
    filter size = filters[s]. strides[s] determine the downsampling factor
    at each resolution scale.

    [1] J. Long et al. Fully convolutional networks for semantic segmentation.
        CVPR 2015.
    [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016.

    Args:
        inputs (tf.Tensor): Input feature tensor to the network (rank 5
            required).
        num_classes (int): Number of output classes.
        num_res_units (int, optional): Number of residual units at each
            resolution scale.
        filters (tuple, optional): Number of filters for all residual units at
            each resolution scale.
        strides (tuple, optional): Stride of the first unit on a resolution
            scale.
        mode (TYPE, optional): One of the tf.estimator.ModeKeys strings:
            TRAIN, EVAL or PREDICT
        use_bias (bool, optional): Boolean, whether the layer uses a bias.
        activation (optional): A function to use as activation function.
        kernel_initializer (TYPE, optional): An initializer for the convolution
            kernel.
        bias_initializer (TYPE, optional): An initializer for the bias vector.
            If None, no bias will be applied.
        kernel_regularizer (None, optional): Optional regularizer for the
            convolution kernel.
        bias_regularizer (None, optional): Optional regularizer for the bias
            vector.

    Returns:
        dict: dictionary of output tensors
    """
    outputs = {}
    assert len(strides) == len(filters)
    assert len(inputs.get_shape().as_list()) == 5, \
        'inputs are required to have a rank of 5.'

    conv_params = {'use_bias': use_bias,
                   'kernel_initializer': kernel_initializer,
                   'bias_initializer': bias_initializer,
                   'kernel_regularizer': kernel_regularizer,
                   'bias_regularizer': bias_regularizer}

    x = inputs

    # Inital convolution with filters[0]
    x = tf.layers.conv3d(inputs=x,
                         filters=filters[0],
                         kernel_size=(3, 3, 3),
                         strides=strides[0],
                         padding='same',
                         **conv_params)

    tf.logging.info('Init conv tensor shape {}'.format(x.get_shape()))

    # Residual feature encoding blocks with num_res_units at different
    # resolution scales res_scales
    res_scales = [x]
    saved_strides = []
    for res_scale in range(1, len(filters)):

        # Features are downsampled via strided convolutions. These are defined
        # in `strides` and subsequently saved
        with tf.variable_scope('unit_{}_0'.format(res_scale)):

            x = vanilla_residual_unit_3d(
                inputs=x,
                out_filters=filters[res_scale],
                strides=strides[res_scale],
                activation=activation,
                mode=mode)
        saved_strides.append(strides[res_scale])

        for i in range(1, num_res_units):

            with tf.variable_scope('unit_{}_{}'.format(res_scale, i)):

                x = vanilla_residual_unit_3d(
                    inputs=x,
                    out_filters=filters[res_scale],
                    strides=(1, 1, 1),
                    activation=activation,
                    mode=mode)
        res_scales.append(x)

        tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format(
            res_scale, x.get_shape()))

    # Upscore layers [2] reconstruct the predictions to higher resolution
    # scales
    for res_scale in range(len(filters) - 2, -1, -1):

        with tf.variable_scope('upscore_{}'.format(res_scale)):

            x = upscore_layer_3d(
                inputs=x,
                inputs2=res_scales[res_scale],
                out_filters=num_classes,
                strides=saved_strides[res_scale],
                mode=mode,
                **conv_params)

        tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format(
            res_scale, x.get_shape()))

    # Last convolution
    with tf.variable_scope('last'):
        x = tf.layers.conv3d(inputs=x,
                             filters=num_classes,
                             kernel_size=(1, 1, 1),
                             strides=(1, 1, 1),
                             padding='same',
                             **conv_params)

    tf.logging.info('Output tensor shape {}'.format(x.get_shape()))

    # Define the outputs
    outputs['logits'] = x

    with tf.variable_scope('pred'):

        y_prob = tf.nn.softmax(x)
        outputs['y_prob'] = y_prob

        y_ = tf.argmax(x, axis=-1) \
            if num_classes > 1 \
            else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32)

        outputs['y_'] = y_

    return outputs