Source code for theanets.feedforward

# -*- coding: utf-8 -*-

'''This module contains common feedforward network models.'''

import numpy as np
import theano
import warnings

from . import graph
from . import layers
from . import regularizers
from . import util


[docs]class Autoencoder(graph.Network): r'''An autoencoder network attempts to reproduce its input. Examples -------- To create an autoencoder, just create a new model instance. Often you'll provide the layer configuration at this time: >>> model = theanets.Autoencoder([10, 20, 10]) If you want to create an autoencoder with tied weights, specify that layer type when creating the model: >>> model = theanets.Autoencoder([10, 20, (10, 'tied')]) See :ref:`guide-creating` for more information. *Data* Training data for an autoencoder takes the form of a two-dimensional array. The shape of this array is (num-examples, num-variables): the first axis enumerates data points in a batch, and the second enumerates the variables in the model. For instance, to create a training dataset containing 1000 examples: >>> inputs = np.random.randn(1000, 10).astype('f') *Training* Training the model can be as simple as calling the :func:`train() <theanets.graph.Network.train>` method: >>> model.train([inputs]) See :ref:`guide-training` for more information about training. *Use* A model can be used to :func:`predict() <theanets.graph.Network.predict>` the output of some input data points: >>> test = np.random.randn(3, 10).astype('f') >>> print(model.predict(test)) Additionally, autoencoders can :func:`encode() <theanets.feedforward.Autoencoder.encode>` a set of input data points: >>> enc = model.encode(test) >>> enc.shape (3, 20) The model can also :func:`decode() <theanets.feedforward.Autoencoder.decode>` a set of encoded data: >>> model.decode(enc) See :ref:`guide-using` for more information about using models. Notes ----- Autoencoder models default to a :class:`MSE <theanets.losses.MeanSquaredError>` loss. To use a different loss, provide a non-default argument for the ``loss`` keyword argument when constructing your model. Formally, an autoencoder defines a parametric mapping from a data space to the same space: .. math:: F_\theta: \mathcal{S} \to \mathcal{S} Often, this mapping can be decomposed into an "encoding" stage :math:`f_\alpha(\cdot)` and a corresponding "decoding" stage :math:`g_\beta(\cdot)` to and from some latent space :math:`\mathcal{Z} = \mathbb{R}^{n_z}`: .. math:: \begin{eqnarray*} f_\alpha &:& \mathcal{S} \to \mathcal{Z} \\ g_\beta &:& \mathcal{Z} \to \mathcal{S} \end{eqnarray*} Autoencoders form an interesting class of models for several reasons. They: - require only "unlabeled" data (which is typically easy to obtain), - are generalizations of many popular density estimation techniques, and - can be used to model the "manifold" or density of a dataset. Many extremely common dimensionality reduction techniques can be expressed as autoencoders. For instance, Principal Component Analysis (PCA) can be expressed as a model with two tied, linear layers: >>> pca = theanets.Autoencoder([10, (5, 'linear'), (10, 'tied')]) Similarly, Independent Component Analysis (ICA) can be expressed as the same model, but trained with a sparsity penalty on the hidden-layer activations: >>> ica = pca >>> ica.train([inputs], hidden_l1=0.1) In this light, "nonlinear PCA" is quite easy to formulate as well! '''
[docs] def __init__(self, layers, loss='mse', weighted=False, rng=13): super(Autoencoder, self).__init__(layers, rng=rng) self.set_loss(form=loss, target=self.inputs[0], weighted=weighted)
[docs] def encode(self, x, layer=None, sample=False, **kwargs): '''Encode a dataset using the hidden layer activations of our network. Parameters ---------- x : ndarray A dataset to encode. Rows of this dataset capture individual data points, while columns represent the variables in each data point. layer : str, optional The name of the hidden layer output to use. By default, we use the "middle" hidden layer---for example, for a 4,2,4 or 4,3,2,3,4 autoencoder, we use the layer with size 2. sample : bool, optional If True, then draw a sample using the hidden activations as independent Bernoulli probabilities for the encoded data. This assumes the hidden layer has a logistic sigmoid activation function. Returns ------- ndarray : The given dataset, encoded by the appropriate hidden layer activation. ''' enc = self.feed_forward(x, **kwargs)[self._find_output(layer)] if sample: return np.random.binomial(n=1, p=enc).astype(np.uint8) return enc
[docs] def decode(self, z, layer=None, **kwargs): '''Decode an encoded dataset by computing the output layer activation. Parameters ---------- z : ndarray A matrix containing encoded data from this autoencoder. layer : int or str or :class:`Layer <layers.Layer>`, optional The index or name of the hidden layer that was used to encode `z`. Returns ------- decoded : ndarray The decoded dataset. ''' key = self._find_output(layer) if key not in self._functions: regs = regularizers.from_kwargs(self, **kwargs) outputs, updates = self.build_graph(regs) self._functions[key] = theano.function( [outputs[key]], [outputs[self.layers[-1].output_name]], updates=updates) return self._functions[key](z)[0]
def _find_output(self, layer): '''Find a layer output name for the given layer specifier. Parameters ---------- layer : None, int, str, or :class:`theanets.layers.Layer` A layer specification. If this is None, the "middle" layer in the network will be used (i.e., the layer at the middle index in the list of network layers). If this is an integer, the corresponding layer in the network's layer list will be used. If this is a string, the layer with the corresponding name will be returned. Returns ------- name : str The fully-scoped output name for the desired layer. ''' if layer is None: layer = len(self.layers) // 2 if isinstance(layer, int): layer = self.layers[layer] if isinstance(layer, util.basestring): try: layer = [l for l in self.layers if l.name == layer][0] except IndexError: pass if isinstance(layer, layers.Layer): layer = layer.output_name return layer
[docs] def score(self, x, w=None, **kwargs): '''Compute R^2 coefficient of determination for a given input. Parameters ---------- x : ndarray (num-examples, num-inputs) An array containing data to be fed into the network. Multiple examples are arranged as rows in this array, with columns containing the variables for each example. Returns ------- r2 : float The R^2 correlation between the prediction of this netork and its input. This can serve as one measure of the information loss of the autoencoder. ''' return super(Autoencoder, self).score(x, x, w=w, **kwargs)
[docs]class Regressor(graph.Network): '''A regressor attempts to produce a target output given some inputs. Examples -------- To create a regression model, just create a new class instance. Often you'll provide the layer configuration at this time: >>> model = theanets.Regressor([10, 20, 3]) See :ref:`guide-creating` for more information. *Data* Training data for a regression model takes the form of two two-dimensional arrays. The shapes of both of these arrays are (num-examples, num-variables) -- the first axis enumerates data points in a batch, and the second enumerates the relevant variables (input variables for the input array, and output variables for the output array). For instance, to create a training dataset containing 1000 examples: >>> inputs = np.random.randn(1000, 10).astype('f') >>> outputs = np.random.randn(1000, 3).astype('f') *Training* Training the model can be as simple as calling the :func:`train() <theanets.graph.Network.train>` method, with the inputs and target outputs as data: >>> model.train([inputs, outputs]) See :ref:`guide-training` for more information. *Use* A regression model can be used to :func:`predict() <theanets.graph.Network.predict>` the output of some input data points: >>> test = np.random.randn(3, 10).astype('f') >>> print(model.predict(test)) See :ref:`guide-using` for more information. Notes ----- Regressor models default to a :class:`MSE <theanets.losses.MeanSquaredError>` loss. To use a different loss, provide a non-default argument for the ``loss`` keyword argument when constructing your model. '''
[docs]class Classifier(graph.Network): '''A classifier computes a distribution over labels, given an input. Examples -------- To create a classification model, just create a new class instance. Often you'll provide the layer configuration at this time: >>> model = theanets.Classifier([10, (20, 'tanh'), 50]) See :ref:`guide-creating` for more information. *Data* Training data for a classification model takes the form of a two-dimensional array of input data and a one-dimensional vector of target labels. The input array has a shape (num-examples, num-variables): the first axis enumerates data points in a batch, and the second enumerates the input variables in the model. The second array provides the target class labels for the inputs. Its shape is (num-examples, ), and each integer value in the array gives the class label for the corresponding input example. For instance, to create a training dataset containing 1000 examples: >>> inputs = np.random.randn(1000, 10).astype('f') >>> outputs = np.random.randint(50, size=1000).astype('i') *Training* Training the model can be as simple as calling the :func:`train() <theanets.graph.Network.train>` method, giving the inputs and target outputs as a dataset: >>> model.train([inputs, outputs]) See :ref:`guide-training` for more information. *Use* A classification model can be used to :func:`predict() <theanets.graph.Network.predict>` the output of some input data points: >>> test = np.random.randn(3, 10).astype('f') >>> print(model.predict(test)) This method returns a vector containing the most likely class for each input example. To retrieve the probabilities of the classes for each example, use :func:`predict_proba() <theanets.feedforward.Classifier.predict_proba>`: >>> model.predict_proba(test).shape (3, 50) See also :ref:`guide-using` for more information. Notes ----- Classifier models default to a :class:`cross-entropy <theanets.losses.CrossEntropy>` loss. To use a different loss, provide a non-default argument for the ``loss`` keyword argument when constructing your model. ''' DEFAULT_OUTPUT_ACTIVATION = 'softmax' '''Default activation for the output layer.''' OUTPUT_NDIM = 1 '''Number of dimensions for holding output data arrays.'''
[docs] def __init__(self, layers, loss='xe', weighted=False, rng=13): super(Classifier, self).__init__(layers, loss=loss, weighted=weighted, rng=rng)
[docs] def monitors(self, **kwargs): '''Return expressions that should be computed to monitor training. Returns ------- monitors : list of (name, expression) pairs A list of named monitor expressions to compute for this network. ''' monitors = super(Classifier, self).monitors(**kwargs) regs = regularizers.from_kwargs(self, **kwargs) outputs, _ = self.build_graph(regs) return monitors + [('acc', self.losses[0].accuracy(outputs))]
[docs] def predict(self, x, **kwargs): '''Compute a greedy classification for the given set of data. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing examples to classify. Examples are given as the rows in this array. Returns ------- k : ndarray (num-examples, ) A vector of class index values, one per row of input data. ''' outputs = self.feed_forward(x, **kwargs) return outputs[self.layers[-1].output_name].argmax(axis=-1)
def classify(self, x, **kwargs): warnings.warn('please use predict() instead of classify()', DeprecationWarning) return self.predict(x, **kwargs)
[docs] def predict_proba(self, x, **kwargs): '''Compute class posterior probabilities for the given set of data. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing examples to predict. Examples are given as the rows in this array. Returns ------- p : ndarray (num-examples, num-classes) An array of class posterior probability values, one per row of input data. ''' return self.feed_forward(x, **kwargs)[self.layers[-1].output_name]
[docs] def predict_logit(self, x, **kwargs): '''Compute the logit values that underlie the softmax output. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing examples to classify. Examples are given as the rows in this array. Returns ------- l : ndarray (num-examples, num-classes) An array of posterior class logit values, one row of logit values per row of input data. ''' return self.feed_forward(x, **kwargs)[self.layers[-1].full_name('pre')]
[docs] def score(self, x, y, w=None, **kwargs): '''Compute the mean accuracy on a set of labeled data. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing examples to classify. Examples are given as the rows in this array. y : ndarray (num-examples, ) A vector of integer class labels, one for each row of input data. w : ndarray (num-examples, ) A vector of weights, one for each row of input data. Returns ------- score : float The (possibly weighted) mean accuracy of the model on the data. ''' eq = y == self.predict(x, **kwargs) if w is not None: return (w * eq).sum() / w.sum() return eq.mean()