Source code for theanets.graph

# -*- coding: utf-8 -*-

r'''This module contains a base class for modeling computation graphs.'''

import downhill
import gzip
import hashlib
import numpy as np
import pickle
import theano
import time
import warnings

from . import layers
from . import losses
from . import regularizers
from . import trainer
from . import util


[docs]class Network(object):
    '''The network class encapsulates a network computation graph.

    Notes
    -----

    Computation graphs are organized into :ref:`layers <layers>`. Each layer
    receives one or more arrays of input data, transforms them, and generates
    one or more arrays of output data.

    Outputs in a computation graph are named according to their layer and output
    type, so the 'pre' output of a layer named 'hid1' would be named 'hid1:pre'.
    The 'out' output is the default output for a layer. By default the last
    layer in a network is named 'out'.

    The parameters in a network graph are optimized by minimizing a :ref:`loss
    function <losses>` with respect to some set of training data. Typically the
    value produced by 'out:out' is compared to some target value, creating an
    error value of some sort. This error value is then propagated back through
    the computation graph to update the parameters in the model.

    Parameters
    ----------
    layers : int, tuple, dict, or :class:`Layer <theanets.layers.base.Layer>`
        A sequence of values specifying the layer configuration for the network.
        For more information, please see :ref:`guide-creating-specifying-layers`.
    loss : str or :class:`Loss <theanets.losses.Loss>`
        The name of a loss function to optimize when training this network
        model.
    weighted : bool, optional
        If True, optimize this model using a "weighted" loss. Weighted losses
        typically require an additional array as input during optimization.
        For more information, see :ref:`losses-weighted`. Defaults to False.
    rng : int or RandomState, optional
        A seed or numpy ``RandomState`` instance for generating randomness in
        the model. Defaults to 13.

    Attributes
    ----------
    layers : list of :class:`Layer <theanets.layers.base.Layer>`
        A list of the layers in this network model.
    losses : list of :class:`Loss <theanets.losses.Loss>`
        A list of losses to be computed when optimizing this network model.
    '''

    DEFAULT_OUTPUT_ACTIVATION = 'linear'
    '''Default activation for the output layer.'''

    INPUT_NDIM = 2
    '''Number of dimensions for holding input data arrays.'''

    OUTPUT_NDIM = 2
    '''Number of dimensions for holding output data arrays.'''

[docs]    def __init__(self, layers=(), loss='mse', weighted=False, rng=13):
        self._graphs = {}     # cache of symbolic computation graphs
        self._functions = {}  # cache of callable feedforward functions
        self._rng = rng

        # create layers based on specs provided in the constructor.
        self.layers = []
        for i, layer in enumerate(layers):
            first = i == 0
            last = i == len(layers) - 1
            name = 'in' if first else 'out' if last else 'hid{}'.format(i)
            activation = self.DEFAULT_OUTPUT_ACTIVATION if last else 'relu'
            self.add_layer(layer=layer, name=name, activation=activation)

        # bind layers to this graph after construction. this finalizes layer
        # shapes and does other consistency checks based on the entire graph.
        [l.bind(self) for l in self.layers]

        # create a default loss (usually).
        self.losses = []
        if loss and self.layers:
            self.set_loss(loss,
                          weighted=weighted,
                          target=self.OUTPUT_NDIM,
                          output_name=self.layers[-1].output_name)

[docs]    def add_layer(self, layer=None, **kwargs):
        '''Add a :ref:`layer <layers>` to our network graph.

        Parameters
        ----------
        layer : int, tuple, dict, or :class:`Layer <theanets.layers.base.Layer>`
            A value specifying the layer to add. For more information, please
            see :ref:`guide-creating-specifying-layers`.
        '''
        # if the given layer is a Layer instance, just add it and move on.
        if isinstance(layer, layers.Layer):
            self.layers.append(layer)
            return

        form = kwargs.pop('form', 'ff' if self.layers else 'input').lower()

        if isinstance(layer, util.basestring):
            if not layers.Layer.is_registered(layer):
                raise util.ConfigurationError('unknown layer type: {}'.format(layer))
            form = layer
            layer = None

        # if layer is a tuple/list of integers, assume it's a shape.
        if isinstance(layer, (tuple, list)) and all(isinstance(x, int) for x in layer):
            kwargs['shape'] = tuple(layer)
            layer = None

        # if layer is some other tuple/list, assume it's a list of:
        # - the name of a layers.Layer class (str)
        # - the name of an activation function (str)
        # - the number of units in the layer (int)
        if isinstance(layer, (tuple, list)):
            for el in layer:
                if isinstance(el, util.basestring) and layers.Layer.is_registered(el):
                    form = el
                elif isinstance(el, util.basestring):
                    kwargs['activation'] = el
                elif isinstance(el, int):
                    if 'size' in kwargs:
                        raise util.ConfigurationError(
                            'duplicate layer sizes! {}'.format(kwargs))
                    kwargs['size'] = el
            layer = None

        # if layer is a dictionary, try to extract a form for the layer, and
        # override our default keyword arguments with the rest.
        if isinstance(layer, dict):
            for key, value in layer.items():
                if key == 'form':
                    form = value.lower()
                else:
                    kwargs[key] = value
            layer = None

        # if neither shape nor size have been specified yet, check that the
        # "layer" param is an int and use it for "size".
        if 'shape' not in kwargs and 'size' not in kwargs and isinstance(layer, int):
            kwargs['size'] = layer

        # if it hasn't been provided in some other way yet, set input
        # dimensionality based on the model.
        if form == 'input' and 'shape' not in kwargs:
            kwargs.setdefault('ndim', self.INPUT_NDIM)

        # set some default layer parameters.
        if form != 'input':
            kwargs.setdefault('inputs', self.layers[-1].output_name)
            kwargs.setdefault('rng', self._rng)

        if form.lower() == 'tied' and 'partner' not in kwargs:
            # we look backward through our list of layers for a partner.
            # any "tied" layer that we find increases a counter by one,
            # and any "untied" layer decreases the counter by one. our
            # partner is the first layer we find with count zero.
            #
            # this is intended to handle the hopefully common case of a
            # (possibly deep) tied-weights autoencoder.
            tied = 1
            partner = None
            for l in self.layers[::-1]:
                tied += 1 if isinstance(l, layers.Tied) else -1
                if tied == 0:
                    partner = l.name
                    break
            else:
                raise util.ConfigurationError(
                    'cannot find partner for "{}"'.format(kwargs))
            kwargs['partner'] = partner

        layer = layers.Layer.build(form, **kwargs)

        # check that graph inputs have unique names.
        if isinstance(layer, layers.Input):
            if any(layer.name == i.name for i in self.inputs):
                raise util.ConfigurationError(
                    '"{}": duplicate input name!'.format(layer.name))

        self.layers.append(layer)

[docs]    def add_loss(self, loss=None, **kwargs):
        '''Add a :ref:`loss function <losses>` to the model.

        Parameters
        ----------
        loss : str, dict, or :class:`theanets.losses.Loss`
            A loss function to add. If this is a Loss instance, it will be added
            immediately. If this is a string, it names a loss function to build
            and add. If it is a dictionary, it should contain a ``'form'`` key
            whose string value names the loss function to add. Other arguments
            will be passed to :func:`theanets.losses.Loss.build`.
        '''
        if isinstance(loss, losses.Loss):
            self.losses.append(loss)
            return

        form = loss or 'mse'
        if 'form' in kwargs:
            form = kwargs.pop('form').lower()

        kw = dict(target=self.INPUT_NDIM, output_name=self.layers[-1].output_name)
        kw.update(kwargs)

        if isinstance(loss, dict):
            loss = dict(loss)
            if 'form' in loss:
                form = loss.pop('form').lower()
            kw.update(loss)

        self.losses.append(losses.Loss.build(form, **kw))

[docs]    def set_loss(self, *args, **kwargs):
        '''Clear the current loss functions from the network and add a new one.

        All parameters and keyword arguments are passed to :func:`add_loss`
        after clearing the current losses.
        '''
        self.losses = []
        self.add_loss(*args, **kwargs)

[docs]    def itertrain(self, train, valid=None, algo='rmsprop', subalgo='rmsprop',
                  save_every=0, save_progress=None, **kwargs):
        '''Train our network, one batch at a time.

        This method yields a series of ``(train, valid)`` monitor pairs. The
        ``train`` value is a dictionary mapping names to monitor values
        evaluated on the training dataset. The ``valid`` value is also a
        dictionary mapping names to values, but these values are evaluated on
        the validation dataset.

        Because validation might not occur every training iteration, the
        validation monitors might be repeated for multiple training iterations.
        It is probably most helpful to think of the validation monitors as being
        the "most recent" values that have been computed.

        After training completes, the network attribute of this class will
        contain the trained network parameters.

        Parameters
        ----------
        train : :class:`Dataset <downhill.dataset.Dataset>` or list
            A dataset to use when training the network. If this is a
            ``downhill.Dataset`` instance, it will be used directly as the
            training datset. If it is a list of numpy arrays or a list of
            callables, it will be converted to a ``downhill.Dataset`` and then
            used as the training set.
        valid : :class:`Dataset <downhill.dataset.Dataset>` or list, optional
            If this is provided, it will be used as a validation dataset. If not
            provided, the training set will be used for validation. (This is not
            recommended!)
        algo : str, optional
            An optimization algorithm to use for training our network. If not
            provided, :class:`RMSProp <downhill.adaptive.RMSProp>` will be used.
        subalgo : str, optional
            An optimization algorithm to use for a trainer that requires a
            "sub-algorithm," sugh as an unsupervised pretrainer. Defaults to
            :class:`RMSProp <downhill.adaptive.RMSProp>`.
        save_every : int or float, optional
            If this is nonzero and ``save_progress`` is not None, then the model
            being trained will be saved periodically. If this is a float, it is
            treated as a number of minutes to wait between savings. If it is an
            int, it is treated as the number of training epochs to wait between
            savings. Defaults to 0.
        save_progress : str or file handle, optional
            If this is not None, and ``save_progress`` is nonzero, then save the
            model periodically during training. This parameter gives either (a)
            the full path of a file to save the model, or (b) a file-like object
            where the model should be saved. If it is a string and the given
            name contains a "{}" format specifier, it will be filled with the
            integer Unix timestamp at the time the model is saved. Defaults to
            None, which does not save models.

        Yields
        ------
        training : dict
            A dictionary of monitor values computed using the training dataset,
            at the conclusion of training. This dictionary will at least contain
            a 'loss' key that indicates the value of the loss function. Other
            keys may be available depending on the trainer being used.
        validation : dict
            A dictionary of monitor values computed using the validation
            dataset, at the conclusion of training.
        '''
        if 'rng' not in kwargs:
            kwargs['rng'] = self._rng

        def create_dataset(data, **kwargs):
            name = kwargs.get('name', 'dataset')
            s = '{}_batches'.format(name)
            return downhill.Dataset(
                data,
                name=name,
                batch_size=kwargs.get('batch_size', 32),
                iteration_size=kwargs.get('iteration_size', kwargs.get(s)),
                axis=kwargs.get('axis', 0),
                rng=kwargs['rng'])

        # set up datasets ...
        if valid is None:
            valid = train
        if not isinstance(valid, downhill.Dataset):
            valid = create_dataset(valid, name='valid', **kwargs)
        if not isinstance(train, downhill.Dataset):
            train = create_dataset(train, name='train', **kwargs)

        if 'algorithm' in kwargs:
            warnings.warn(
                'please use the "algo" keyword arg instead of "algorithm"',
                DeprecationWarning)
            algo = kwargs.pop('algorithm')
            if isinstance(algo, (list, tuple)):
                algo = algo[0]

        # set up trainer ...
        if isinstance(algo, util.basestring):
            algo = algo.lower()
            if algo == 'sample':
                algo = trainer.SampleTrainer(self)
            elif algo.startswith('layer') or algo.startswith('sup'):
                algo = trainer.SupervisedPretrainer(subalgo, self)
            elif algo.startswith('pre') or algo.startswith('unsup'):
                algo = trainer.UnsupervisedPretrainer(subalgo, self)
            else:
                algo = trainer.DownhillTrainer(algo, self)

        # set up check to save model ...
        def needs_saving(elapsed, iteration):
            if save_progress is None:
                return False
            if isinstance(save_every, float):
                return elapsed > 60 * save_every
            if isinstance(save_every, int):
                return iteration % save_every == 0
            return False

        # train it!
        start = time.time()
        for i, monitors in enumerate(algo.itertrain(train, valid, **kwargs)):
            yield monitors
            now = time.time()
            if i and needs_saving(now - start, i):
                filename_or_handle = save_progress
                if isinstance(filename_or_handle, util.basestring):
                    filename_or_handle = save_progress.format(int(now))
                self.save(filename_or_handle)
                start = now

[docs]    def train(self, *args, **kwargs):
        '''Train the network until the trainer converges.

        All arguments are passed to :func:`itertrain`.

        Returns
        -------
        training : dict
            A dictionary of monitor values computed using the training dataset,
            at the conclusion of training. This dictionary will at least contain
            a 'loss' key that indicates the value of the loss function. Other
            keys may be available depending on the trainer being used.
        validation : dict
            A dictionary of monitor values computed using the validation
            dataset, at the conclusion of training.
        '''
        monitors = None
        for monitors in self.itertrain(*args, **kwargs):
            pass
        return monitors

    def _hash(self, regularizers=()):
        '''Construct a string key for representing a computation graph.

        This key will be unique for a given (a) network topology, (b) set of
        losses, and (c) set of regularizers.

        Returns
        -------
        key : str
            A hash representing the computation graph for the current network.
        '''
        def add(s):
            h.update(str(s).encode('utf-8'))
        h = hashlib.md5()
        for l in self.layers:
            add('{}{}{}'.format(l.__class__.__name__, l.name, l.output_shape))
        for l in self.losses:
            add('{}{}'.format(l.__class__.__name__, l.weight))
        for r in regularizers:
            add('{}{}{}'.format(r.__class__.__name__, r.weight, r.pattern))
        return h.hexdigest()

[docs]    def build_graph(self, regularizers=()):
        '''Connect the layers in this network to form a computation graph.

        Parameters
        ----------
        regularizers : list of :class:`theanets.regularizers.Regularizer`
            A list of the regularizers to apply while building the computation
            graph.

        Returns
        -------
        outputs : list of Theano variables
            A list of expressions giving the output of each layer in the graph.
        updates : list of update tuples
            A list of updates that should be performed by a Theano function that
            computes something using this graph.
        '''
        key = self._hash(regularizers)
        if key not in self._graphs:
            util.log('building computation graph')
            for loss in self.losses:
                loss.log()
            for reg in regularizers:
                reg.log()
            outputs = {}
            updates = []
            for layer in self.layers:
                out, upd = layer.connect(outputs)
                for reg in regularizers:
                    reg.modify_graph(out)
                outputs.update(out)
                updates.extend(upd)
            self._graphs[key] = outputs, updates
        return self._graphs[key]

    @property
    def inputs(self):
        '''A list of Theano variables for feedforward computations.'''
        return [l.input for l in self.layers if isinstance(l, layers.Input)]

    @property
    def variables(self):
        '''A list of Theano variables for loss computations.'''
        result = self.inputs
        seen = set(i.name for i in result)
        for loss in self.losses:
            for v in loss.variables:
                if v.name not in seen:
                    result.append(v)
                    seen.add(v.name)
        return result

    @property
    def params(self):
        '''A list of the learnable Theano parameters for this network.'''
        return [p for l in self.layers for p in l.params]

[docs]    def find(self, which, param):
        '''Get a parameter from a layer in the network.

        Parameters
        ----------
        which : int or str
            The layer that owns the parameter to return.

            If this is an integer, then 0 refers to the input layer, 1 refers
            to the first hidden layer, 2 to the second, and so on.

            If this is a string, the layer with the corresponding name, if any,
            will be used.

        param : int or str
            Name of the parameter to retrieve from the specified layer, or its
            index in the parameter list of the layer.

        Raises
        ------
        KeyError
            If there is no such layer, or if there is no such parameter in the
            specified layer.

        Returns
        -------
        param : Theano shared variable
            A shared parameter variable from the indicated layer.
        '''
        for i, layer in enumerate(self.layers):
            if which == i or which == layer.name:
                return layer.find(param)
        raise KeyError(which)

[docs]    def feed_forward(self, x, **kwargs):
        '''Compute a forward pass of all layers from the given input.

        All keyword arguments are passed directly to :func:`build_graph`.

        Parameters
        ----------
        x : ndarray (num-examples, num-variables)
            An array containing data to be fed into the network. Multiple
            examples are arranged as rows in this array, with columns containing
            the variables for each example.

        Returns
        -------
        layers : list of ndarray (num-examples, num-units)
            The activation values of each layer in the the network when given
            input `x`. For each of the hidden layers, an array is returned
            containing one row per input example; the columns of each array
            correspond to units in the respective layer. The "output" of the
            network is the last element of this list.
        '''
        regs = regularizers.from_kwargs(self, **kwargs)
        key = self._hash(regs)
        if key not in self._functions:
            outputs, updates = self.build_graph(regs)
            labels, exprs = list(outputs.keys()), list(outputs.values())
            util.log('compiling feed_forward function')
            self._functions[key] = (labels, theano.function(
                self.inputs, exprs, updates=updates))
        labels, f = self._functions[key]
        return dict(zip(labels, f(x)))

[docs]    def predict(self, x, **kwargs):
        '''Compute a forward pass of the inputs, returning the network output.

        All keyword arguments end up being passed to :func:`build_graph`.

        Parameters
        ----------
        x : ndarray (num-examples, num-variables)
            An array containing data to be fed into the network. Multiple
            examples are arranged as rows in this array, with columns containing
            the variables for each example.

        Returns
        -------
        y : ndarray (num-examples, num-variables)
            Returns the values of the network output units when given input `x`.
            Rows in this array correspond to examples, and columns to output
            variables.
        '''
        return self.feed_forward(x, **kwargs)[self.layers[-1].output_name]

[docs]    def score(self, x, y, w=None, **kwargs):
        '''Compute R^2 coefficient of determination for a given labeled input.

        Parameters
        ----------
        x : ndarray (num-examples, num-inputs)
            An array containing data to be fed into the network. Multiple
            examples are arranged as rows in this array, with columns containing
            the variables for each example.
        y : ndarray (num-examples, num-outputs)
            An array containing expected target data for the network. Multiple
            examples are arranged as rows in this array, with columns containing
            the variables for each example.

        Returns
        -------
        r2 : float
            The R^2 correlation between the prediction of this netork and its
            target output.
        '''
        u = y - self.predict(x, **kwargs)
        v = y - y.mean()
        if w is None:
            w = np.ones_like(u)
        return 1 - (w * u * u).sum() / (w * v * v).sum()

    def __getstate__(self):
        return (self.layers, self.losses)

    def __setstate__(self, state):
        self.layers, self.losses = state
        self._graphs = {}
        self._functions = {}

[docs]    def save(self, filename_or_handle):
        '''Save the state of this network to a pickle file on disk.

        Parameters
        ----------
        filename_or_handle : str or file handle
            Save the state of this network to a pickle file. If this parameter
            is a string, it names the file where the pickle will be saved. If it
            is a file-like object, this object will be used for writing the
            pickle. If the filename ends in ".gz" then the output will
            automatically be gzipped.
        '''
        if isinstance(filename_or_handle, util.basestring):
            opener = gzip.open if filename_or_handle.lower().endswith('.gz') else open
            handle = opener(filename_or_handle, 'wb')
        else:
            handle = filename_or_handle
        pickle.dump(self, handle, -1)
        if isinstance(filename_or_handle, util.basestring):
            handle.close()
        util.log('saved model to {}', filename_or_handle)

[docs]    @classmethod
    def load(cls, filename_or_handle):
        '''Load a saved network from disk.

        Parameters
        ----------
        filename_or_handle : str or file handle
            Load the state of this network from a pickle file. If this parameter
            is a string, it names the file where the pickle will be saved. If it
            is a file-like object, this object will be used for reading the
            pickle. If the filename ends in ".gz" then the output will
            automatically be gunzipped.
        '''
        assert not isinstance(cls, Network), \
            'cannot load an instance! say instead: net = Network.load(source)'
        if isinstance(filename_or_handle, util.basestring):
            opener = gzip.open if filename_or_handle.lower().endswith('.gz') else open
            handle = opener(filename_or_handle, 'rb')
        else:
            handle = filename_or_handle
        model = pickle.load(handle)
        if isinstance(filename_or_handle, util.basestring):
            handle.close()
        util.log('loaded model from {}', filename_or_handle)
        return model

[docs]    def loss(self, **kwargs):
        '''Return a variable representing the regularized loss for this network.

        The regularized loss includes both the :ref:`loss computation <losses>`
        for the network as well as any :ref:`regularizers <regularizers>` that
        are in place.

        Keyword arguments are passed directly to
        :func:`theanets.regularizers.from_kwargs`.

        Returns
        -------
        loss : Theano expression
            A Theano expression representing the loss of this network.
        '''
        regs = regularizers.from_kwargs(self, **kwargs)
        outputs, _ = self.build_graph(regs)
        return sum(l.weight * l(outputs) for l in self.losses) + \
            sum(r.weight * r.loss(self.layers, outputs) for r in regs)

[docs]    def monitors(self, **kwargs):
        '''Return expressions that should be computed to monitor training.

        Returns
        -------
        monitors : list of (name, expression) pairs
            A list of named monitor expressions to compute for this network.
        '''
        regs = regularizers.from_kwargs(self, **kwargs)
        outputs, _ = self.build_graph(regs)
        monitors = [('err', self.losses[0](outputs))]

        def matching(pattern):
            '''Yield all matching outputs or parameters from the graph.'''
            for name, expr in util.outputs_matching(outputs, pattern):
                yield name, expr
            for name, expr in util.params_matching(self.layers, pattern):
                yield name, expr

        def parse_levels(levels):
            '''Yield named monitor callables.'''
            if isinstance(levels, dict):
                levels = levels.items()
            if isinstance(levels, (int, float)):
                levels = [levels]
            for level in levels:
                if isinstance(level, (tuple, list)):
                    label, call = level
                    yield ':{}'.format(label), call
                if isinstance(level, (int, float)):
                    def call(expr):
                        return (expr < level).mean()
                    yield '<{}'.format(level), call

        inputs = kwargs.get('monitors', {})
        if isinstance(inputs, dict):
            inputs = inputs.items()
        for pattern, levels in inputs:
            for name, expr in matching(pattern):
                for key, value in parse_levels(levels):
                    monitors.append(('{}{}'.format(name, key), value(expr)))

        return monitors

[docs]    def updates(self, **kwargs):
        '''Return expressions to run as updates during network training.

        Returns
        -------
        updates : list of (parameter, expression) pairs
            A list of named parameter update expressions for this network.
        '''
        regs = regularizers.from_kwargs(self, **kwargs)
        _, updates = self.build_graph(regs)
        return updates