Source code for theanets.layers.convolution

# -*- coding: utf-8 -*-

'''Convolutional layers "scan" over input data.'''

from __future__ import division

import numpy as np
import theano
import theano.tensor as TT

from . import base
from .. import util

__all__ = [
    'Conv1',
    'Conv2',
    'Pool1',
    'Pool2',
]


class Convolution(base.Layer):
    '''Convolution layers convolve filters over the input arrays.

    Parameters
    ----------
    filter_size : (int, int)
        Size of the convolution filters for this layer.
    stride : (int, int), optional
        Apply convolutions with this stride; i.e., skip this many samples
        between convolutions. Defaults to (1, 1)---that is, no skipping.
    border_mode : str, optional
        Compute convolutions with this border mode. Defaults to 'valid'.
    '''

    def __init__(self, filter_size, stride=(1, 1), border_mode='valid', **kwargs):
        self.filter_size = filter_size
        self.stride = stride
        self.border_mode = border_mode
        super(Convolution, self).__init__(**kwargs)

    def log(self):
        inputs = ', '.join('"{0}" {1}'.format(*ns) for ns in self._input_shapes.items())
        util.log('layer {0.__class__.__name__} "{0.name}" '
                 '{0.output_shape} {1} {0.border_mode} '
                 'filters {2}{3} from {4}', self,
                 getattr(self.activate, 'name', self.activate),
                 'x'.join(str(i) for i in self.filter_size),
                 ''.join('+{}'.format(i) for i in self.stride),
                 inputs)
        util.log('learnable parameters: {}', self.log_params())

    def add_conv_weights(self, name, mean=0, std=None, sparsity=0):
        '''Add a convolutional weight array to this layer's parameters.

        Parameters
        ----------
        name : str
            Name of the parameter to add.
        mean : float, optional
            Mean value for randomly-initialized weights. Defaults to 0.
        std : float, optional
            Standard deviation of initial matrix values. Defaults to
            :math:`1 / sqrt(n_i + n_o)`.
        sparsity : float, optional
            Fraction of weights to set to zero. Defaults to 0.
        '''
        nin = self.input_size
        nout = self.output_size
        mean = self.kwargs.get(
            'mean_{}'.format(name),
            self.kwargs.get('mean', mean))
        std = self.kwargs.get(
            'std_{}'.format(name),
            self.kwargs.get('std', std or 1 / np.sqrt(nin + nout)))
        sparsity = self.kwargs.get(
            'sparsity_{}'.format(name),
            self.kwargs.get('sparsity', sparsity))
        arr = np.zeros((nout, nin) + self.filter_size, util.FLOAT)
        for r in range(self.filter_size[0]):
            for c in range(self.filter_size[1]):
                arr[:, :, r, c] = util.random_matrix(
                    nout, nin, mean, std, sparsity=sparsity, rng=self.rng)
        self._params.append(theano.shared(arr, name=self._fmt(name)))


[docs]class Conv1(Convolution): '''1-dimensional convolutions run over one data axis. Notes ----- One-dimensional convolution layers are typically used in ``theanets`` models that use recurrent inputs and outputs, i.e., :class:`theanets.recurrent.Autoencoder`, :class:`theanets.recurrent.Predictor`, :class:`theanets.recurrent.Classifier`, or :class:`theanets.recurrent.Regressor`. The convolution will be applied over the "time" dimension (axis 1). Parameters ---------- filter_size : int Length of the convolution filters for this layer. stride : int, optional Apply convolutions with this stride; i.e., skip this many samples between convolutions. Defaults to 1, i.e., no skipping. border_mode : str, optional Compute convolutions with this border mode. Defaults to 'valid'. '''
[docs] def __init__(self, filter_size, stride=1, border_mode='valid', **kwargs): super(Conv1, self).__init__( filter_size=(1, filter_size), stride=(1, stride), border_mode=border_mode, **kwargs)
[docs] def setup(self): self.add_conv_weights('w') self.add_bias('b', self.output_size)
[docs] def resolve_outputs(self): if self.input_shape is None or self.input_shape[0] is None: return super(Conv1, self).resolve_outputs() image = np.array(self.input_shape[:-1]) kernel = np.array(self.filter_size) result = image if self.border_mode == 'full': result = image + kernel - 1 if self.border_mode == 'valid': result = image - kernel + 1 self._output_shapes['out'] = tuple(result) + (self.kwargs['size'], )
[docs] def transform(self, inputs): # input is: (batch, time, input) # conv2d wants: (batch, input, 1, time) x = inputs[self.input_name].dimshuffle(0, 2, 'x', 1) pre = TT.nnet.conv2d( x, self.find('w'), image_shape=(None, self.input_size, 1, None), filter_shape=(self.output_size, self.input_size) + self.filter_size, border_mode=self.border_mode, subsample=self.stride, ).dimshuffle(0, 3, 1, 2)[:, :, :, 0] + self.find('b') # conv2d output is: (batch, output, 1, time) # we want: (batch, time, output) # (have to do [:, :, :, 0] to remove unused trailing dimension) return dict(pre=pre, out=self.activate(pre)), []
[docs]class Conv2(Convolution): '''2-dimensional convolutions run over two data axes. Two-dimensional convolution layers are standard image processing techniques. In theanets, these layers expect an input consisting of (num-examples, width, height, num-channels). Parameters ---------- filter_size : (int, int) Size of the convolution filters for this layer. stride : (int, int), optional Apply convolutions with this stride; i.e., skip this many samples between convolutions. Defaults to (1, 1), i.e., no skipping. border_mode : str, optional Compute convolutions with this border mode. Defaults to 'valid'. '''
[docs] def setup(self): self.add_conv_weights('w') self.add_bias('b', self.output_size)
[docs] def resolve_outputs(self): shape = self.input_shape if shape is None or shape[0] is None or shape[1] is None: return super(Conv2, self).resolve_outputs() image = np.array(shape[:-1]) kernel = np.array(self.filter_size) result = image if self.border_mode == 'full': result = image + kernel - 1 if self.border_mode == 'valid': result = image - kernel + 1 self._output_shapes['out'] = tuple(result) + (self.kwargs['size'], )
[docs] def transform(self, inputs): # input is: (batch, width, height, input) # conv2d wants: (batch, input, width, height) x = inputs[self.input_name].dimshuffle(0, 3, 1, 2) pre = TT.nnet.conv2d( x, self.find('w'), image_shape=(None, self.input_size, None, None), filter_shape=(self.output_size, self.input_size) + self.filter_size, border_mode=self.border_mode, subsample=self.stride, ).dimshuffle(0, 2, 3, 1) + self.find('b') # conv2d output is: (batch, output, width, height) # we want: (batch, width, height, output) return dict(pre=pre, out=self.activate(pre)), []
class Pooling(base.Layer): ''' '''
[docs]class Pool1(Pooling): ''' '''
[docs] def transform(self, inputs): # input is: (batch, time, input) # conv2d wants: (batch, input, time, 1) x = inputs[self.input_name].dimshuffle(0, 2, 1, 'x') pre = TT.signal.downsample.max_pool_2d( x, self.pool_size, st=self.stride, mode=self.mode, ).dimshuffle(0, 2, 1, 3)[:, :, :, 0] # conv2d output is: (batch, output, time, 1) # we want: (batch, time, output) return dict(pre=pre, out=self.activate(pre)), []
[docs]class Pool2(Pooling): ''' '''
[docs] def transform(self, inputs): # input is: (batch, width, height, input) # conv2d wants: (batch, input, width, height) x = inputs[self.input_name].dimshuffle(0, 3, 1, 2) pre = TT.signal.downsample.max_pool_2d( x, self.pool_size, st=self.stride, mode=self.mode, ).dimshuffle(0, 2, 3, 1) # conv2d output is: (batch, output, width, height) # we want: (batch, width, height, output) return dict(pre=pre, out=self.activate(pre)), []