Source code for dltk.networks.autoencoder.convolutional_autoencoder

from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import tensorflow as tf
import numpy as np


[docs]def convolutional_autoencoder_3d(inputs, num_convolutions=1,
                                 num_hidden_units=128, filters=(16, 32, 64),
                                 strides=((2, 2, 2), (2, 2, 2), (2, 2, 2)),
                                 mode=tf.estimator.ModeKeys.TRAIN,
                                 use_bias=False,
                                 activation=tf.nn.relu6,
                                 kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'),
                                 bias_initializer=tf.zeros_initializer(),
                                 kernel_regularizer=None,
                                 bias_regularizer=None):
    """Convolutional autoencoder with num_convolutions on len(filters)
        resolution scales. The downsampling of features is done via strided
        convolutions and upsampling via strided transpose convolutions. On each
        resolution scale s are num_convolutions with filter size = filters[s].
        strides[s] determine the downsampling factor at each resolution scale.

    Args:
        inputs (tf.Tensor): Input tensor to the network, required to be of
            rank 5.
        num_convolutions (int, optional): Number of convolutions per resolution
            scale.
        num_hidden_units (int, optional): Number of hidden units.
        filters (tuple or list, optional): Number of filters for all
            convolutions at each resolution scale.
        strides (tuple or list, optional): Stride of the first convolution on a
            resolution scale.
        mode (str, optional): One of the tf.estimator.ModeKeys strings: TRAIN,
            EVAL or PREDICT
        use_bias (bool, optional): Boolean, whether the layer uses a bias.
        activation (optional): A function to use as activation function.
        kernel_initializer (TYPE, optional): An initializer for the convolution
            kernel.
        bias_initializer (TYPE, optional): An initializer for the bias vector.
            If None, no bias will be applied.
        kernel_regularizer (None, optional): Optional regularizer for the
            convolution kernel.
        bias_regularizer (None, optional): Optional regularizer for the bias
            vector.

    Returns:
        dict: dictionary of output tensors

    """
    outputs = {}
    assert len(strides) == len(filters)
    assert len(inputs.get_shape().as_list()) == 5, \
        'inputs are required to have a rank of 5.'

    conv_op = tf.layers.conv3d
    tp_conv_op = tf.layers.conv3d_transpose

    conv_params = {'padding': 'same',
                   'use_bias': use_bias,
                   'kernel_initializer': kernel_initializer,
                   'bias_initializer': bias_initializer,
                   'kernel_regularizer': kernel_regularizer,
                   'bias_regularizer': bias_regularizer}

    x = inputs
    tf.logging.info('Input tensor shape {}'.format(x.get_shape()))

    # Convolutional feature encoding blocks with num_convolutions at different
    # resolution scales res_scales
    for res_scale in range(0, len(filters)):

        for i in range(0, num_convolutions - 1):
            with tf.variable_scope('enc_unit_{}_{}'.format(res_scale, i)):
                x = conv_op(inputs=x,
                            filters=filters[res_scale],
                            kernel_size=(3, 3, 3),
                            strides=(1, 1, 1),
                            **conv_params)

                x = tf.layers.batch_normalization(
                    inputs=x,
                    training=mode == tf.estimator.ModeKeys.TRAIN)
                x = activation(x)
                tf.logging.info('Encoder at res_scale {} shape: {}'.format(
                    res_scale, x.get_shape()))

        # Employ strided convolutions to downsample
        with tf.variable_scope('enc_unit_{}_{}'.format(
                res_scale,
                num_convolutions)):

            # Adjust the strided conv kernel size to prevent losing information
            k_size = [s * 2 if s > 1 else 3 for s in strides[res_scale]]

            x = conv_op(inputs=x,
                        filters=filters[res_scale],
                        kernel_size=k_size,
                        strides=strides[res_scale],
                        **conv_params)

            x = tf.layers.batch_normalization(
                x, training=mode == tf.estimator.ModeKeys.TRAIN)
            x = activation(x)
            tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format(
                res_scale, x.get_shape()))

    # Densely connected layer of hidden units
    x_shape = x.get_shape().as_list()
    x = tf.reshape(x, (tf.shape(x)[0], np.prod(x_shape[1:])))

    x = tf.layers.dense(inputs=x,
                        units=num_hidden_units,
                        use_bias=conv_params['use_bias'],
                        kernel_initializer=conv_params['kernel_initializer'],
                        bias_initializer=conv_params['bias_initializer'],
                        kernel_regularizer=conv_params['kernel_regularizer'],
                        bias_regularizer=conv_params['bias_regularizer'],
                        name='hidden_units')

    outputs['hidden_units'] = x
    tf.logging.info('Hidden units tensor shape: {}'.format(x.get_shape()))

    x = tf.layers.dense(inputs=x,
                        units=np.prod(x_shape[1:]),
                        activation=activation,
                        use_bias=conv_params['use_bias'],
                        kernel_initializer=conv_params['kernel_initializer'],
                        bias_initializer=conv_params['bias_initializer'],
                        kernel_regularizer=conv_params['kernel_regularizer'],
                        bias_regularizer=conv_params['bias_regularizer'])

    x = tf.reshape(x, [tf.shape(x)[0]] + list(x_shape)[1:])
    tf.logging.info('Decoder input tensor shape: {}'.format(x.get_shape()))

    # Decoding blocks with num_convolutions at different resolution scales
    # res_scales
    for res_scale in reversed(range(0, len(filters))):

        # Employ strided transpose convolutions to upsample
        with tf.variable_scope('dec_unit_{}_0'.format(res_scale)):

            # Adjust the strided tp conv kernel size to prevent losing
            # information
            k_size = [s * 2 if s > 1 else 3 for s in strides[res_scale]]

            x = tp_conv_op(inputs=x,
                           filters=filters[res_scale],
                           kernel_size=k_size,
                           strides=strides[res_scale],
                           **conv_params)

            x = tf.layers.batch_normalization(
                x, training=mode == tf.estimator.ModeKeys.TRAIN)
            x = activation(x)
            tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format(
                res_scale, x.get_shape()))

        for i in range(1, num_convolutions):
            with tf.variable_scope('dec_unit_{}_{}'.format(res_scale, i)):

                x = conv_op(inputs=x,
                            filters=filters[res_scale],
                            kernel_size=(3, 3, 3),
                            strides=(1, 1, 1),
                            **conv_params)

                x = tf.layers.batch_normalization(
                    x, training=mode == tf.estimator.ModeKeys.TRAIN)
                x = activation(x)
            tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format(
                res_scale, x.get_shape()))

    # A final convolution reduces the number of output features to those of
    # the inputs
    x = conv_op(inputs=x,
                filters=inputs.get_shape().as_list()[-1],
                kernel_size=(1, 1, 1),
                strides=(1, 1, 1),
                **conv_params)

    tf.logging.info('Output tensor shape: {}'.format(x.get_shape()))
    outputs['x_'] = x

    return outputs