Source code for theanets.recurrent

# -*- coding: utf-8 -*-

'''This module contains recurrent network models and utilities.'''

import collections
import numpy as np
import re

from . import feedforward


[docs]def batches(arrays, steps=100, batch_size=64, rng=None):
    '''Create a callable that generates samples from a dataset.

    Parameters
    ----------
    arrays : list of ndarray (time-steps, data-dimensions)
        Arrays of data. Rows in these arrays are assumed to correspond to time
        steps, and columns to variables. Multiple arrays can be given; in such
        a case, these arrays usually correspond to [input, output]---for
        example, for a recurrent regression problem---or [input, output,
        weights]---for a weighted regression or classification problem.
    steps : int, optional
        Generate samples of this many time steps. Defaults to 100.
    batch_size : int, optional
        Generate this many samples per call. Defaults to 64. This must match the
        batch_size parameter that was used when creating the recurrent network
        that will process the data.
    rng : :class:`numpy.random.RandomState` or int, optional
        A random number generator, or an integer seed for a random number
        generator. If not provided, the random number generator will be created
        with an automatically chosen seed.

    Returns
    -------
    callable :
        A callable that can be used inside a dataset for training a recurrent
        network.
    '''
    assert batch_size >= 2, 'batch_size must be at least 2!'
    assert isinstance(arrays, (tuple, list)), 'arrays must be a tuple or list!'

    if rng is None or isinstance(rng, int):
        rng = np.random.RandomState(rng)

    def sample():
        xs = [np.zeros((batch_size, steps, a.shape[1]), a.dtype) for a in arrays]
        for i in range(batch_size):
            j = rng.randint(len(arrays[0]) - steps)
            for x, a in zip(xs, arrays):
                x[i] = a[j:j+steps]
        return xs

    return sample


[docs]class Text(object):
    '''A class for handling sequential text data.

    Parameters
    ----------
    text : str
        A blob of text.
    alpha : str, optional
        An alphabet to use for representing characters in the text. If not
        provided, all characters from the text occurring at least ``min_count``
        times will be used.
    min_count : int, optional
        If the alphabet is to be computed from the text, discard characters that
        occur fewer than this number of times. Defaults to 2.
    unknown : str, optional
        A character to use to represent "out-of-alphabet" characters in the
        text. This must not be in the alphabet. Defaults to '\0'.

    Attributes
    ----------
    text : str
        A blob of text, with all non-alphabet characters replaced by the
        "unknown" character.
    alpha : str
        A string containing each character in the alphabet.
    '''

[docs]    def __init__(self, text, alpha=None, min_count=2, unknown='\0'):
        self.alpha = alpha
        if self.alpha is None:
            self.alpha = ''.join(sorted(set(
                char for char, count in
                collections.Counter(text).items()
                if char != unknown and count >= min_count)))
        self.text = re.sub(
            r'[^{}]'.format(re.escape(self.alpha)), unknown, text)
        assert unknown not in self.alpha
        self._rev_index = unknown + self.alpha
        self._fwd_index = dict(zip(self._rev_index, range(1 + len(self.alpha))))

[docs]    def encode(self, txt):
        '''Encode a text string by replacing characters with alphabet index.

        Parameters
        ----------
        txt : str
            A string to encode.

        Returns
        -------
        classes : list of int
            A sequence of alphabet index values corresponding to the given text.
        '''
        return list(self._fwd_index.get(c, 0) for c in txt)

[docs]    def decode(self, enc):
        '''Encode a text string by replacing characters with alphabet index.

        Parameters
        ----------
        classes : list of int
            A sequence of alphabet index values to convert to text.

        Returns
        -------
        txt : str
            A string containing corresponding characters from the alphabet.
        '''
        return ''.join(self._rev_index[c] for c in enc)

[docs]    def classifier_batches(self, steps, batch_size, rng=None):
        '''Create a callable that returns a batch of training data.

        Parameters
        ----------
        steps : int
            Number of time steps in each batch.
        batch_size : int
            Number of training examples per batch.
        rng : :class:`numpy.random.RandomState` or int, optional
            A random number generator, or an integer seed for a random number
            generator. If not provided, the random number generator will be
            created with an automatically chosen seed.

        Returns
        -------
        batch : callable
            A callable that, when called, returns a batch of data that can be
            used to train a classifier model.
        '''
        assert batch_size >= 2, 'batch_size must be at least 2!'

        if rng is None or isinstance(rng, int):
            rng = np.random.RandomState(rng)

        T = np.arange(steps)

        def batch():
            inputs = np.zeros((batch_size, steps, 1 + len(self.alpha)), 'f')
            outputs = np.zeros((batch_size, steps), 'i')
            for b in range(batch_size):
                offset = rng.randint(len(self.text) - steps - 1)
                enc = self.encode(self.text[offset:offset + steps + 1])
                inputs[b, T, enc[:-1]] = 1
                outputs[b, T] = enc[1:]
            return [inputs, outputs]

        return batch


[docs]class Autoencoder(feedforward.Autoencoder):
    '''An autoencoder network attempts to reproduce its input.

    Examples
    --------

    To create a recurrent autoencoder, just create a new model instance. Often
    you'll provide the layer configuration at this time:

    >>> model = theanets.recurrent.Autoencoder([10, (20, 'rnn'), 10])

    See :ref:`guide-creating` for more information.

    *Data*

    Training data for a recurrent autoencoder takes the form of a
    three-dimensional array. The shape of this array is (num-examples,
    num-time-steps, num-variables): the first axis enumerates data points in a
    batch, the second enumerates time steps, and the third enumerates the
    variables in the model.

    For instance, to create a training dataset containing 1000 examples, each
    with 100 time steps:

    >>> inputs = np.random.randn(1000, 100, 10).astype('f')

    *Training*

    Training the model can be as simple as calling the :func:`train()
    <theanets.graph.Network.train>` method:

    >>> model.train([inputs])

    See :ref:`guide-training` for more information.

    *Use*

    A model can be used to :func:`predict() <theanets.graph.Network.predict>`
    the output of some input data points:

    >>> test = np.random.randn(3, 200, 10).astype('f')
    >>> print(model.predict(test))

    Note that the test data does not need to have the same number of time steps
    as the training data.

    Additionally, autoencoders can :func:`encode()
    <theanets.feedforward.Autoencoder.encode>` a set of input data points:

    >>> enc = model.encode(test)

    See :ref:`guide-using` for more information.

    Notes
    -----

    Autoencoder models default to a :class:`MSE
    <theanets.losses.MeanSquaredError>` loss. To use a different loss, provide a
    non-default argument for the ``loss`` keyword argument when constructing
    your model.
    '''

    INPUT_NDIM = 3
    '''Number of dimensions for holding input data arrays.'''

    OUTPUT_NDIM = 3
    '''Number of dimensions for holding output data arrays.'''


[docs]class Regressor(feedforward.Regressor):
    '''A regressor attempts to produce a target output given some inputs.

    Examples
    --------

    To create a recurrent regression model, just create a new class instance.
    Often you'll provide the layer configuration at this time:

    >>> model = theanets.recurrent.Regressor([10, (20, 'rnn'), 3])

    See :ref:`guide-creating` for more information.

    *Data*

    Training data for a recurrent regression model takes the form of two
    three-dimensional arrays. The shapes of these arrays are (num-examples,
    num-time-steps, num-variables): the first axis enumerates data points in a
    batch, the second enumerates time steps, and the third enumerates the
    variables (input variables for the input array, and output variables for the
    output array) in the model.

    For instance, to create a training dataset containing 1000 examples, each
    with 100 time steps:

    >>> inputs = np.random.randn(1000, 100, 10).astype('f')
    >>> outputs = np.random.randn(1000, 100, 3).astype('f')

    *Training*

    Training the model can be as simple as calling the :func:`train()
    <theanets.graph.Network.train>` method:

    >>> model.train([inputs, outputs])

    See :ref:`guide-training` for more information.

    *Use*

    A model can be used to :func:`predict() <theanets.graph.Network.predict>`
    the output of some input data points:

    >>> test = np.random.randn(3, 200, 10).astype('f')
    >>> print(model.predict(test))

    Note that the test data does not need to have the same number of time steps
    as the training data.

    See :ref:`guide-using` for more information.

    Notes
    -----

    Regressor models default to a :class:`MSE
    <theanets.losses.MeanSquaredError>` loss. To use a different loss, provide a
    non-default argument for the ``loss`` keyword argument when constructing
    your model.
    '''

    INPUT_NDIM = 3
    '''Number of dimensions for holding input data arrays.'''

    OUTPUT_NDIM = 3
    '''Number of dimensions for holding output data arrays.'''


[docs]class Classifier(feedforward.Classifier):
    '''A classifier computes a distribution over labels, given an input.

    Examples
    --------

    To create a recurrent classification model, just create a new class
    instance. Often you'll provide the layer configuration at this time:

    >>> model = theanets.recurrent.Classifier([10, (20, 'rnn'), 50])

    See :ref:`guide-creating` for more information.

    *Data*

    Training data for a recurrent classification model takes the form of two
    three-dimensional arrays.

    The first array provides the input data for the model. Its shape is
    (num-examples, num-time-steps, num-variables): the first axis enumerates
    data points in a batch, the second enumerates time steps, and the third
    enumerates the input variables in the model.

    The second array provides the target class labels for the inputs. Its shape
    is (num-examples, num-time-steps), and each integer value in the array gives
    the class label for the corresponding input example and time step.

    For instance, to create a training dataset containing 1000 examples, each
    with 100 time steps:

    >>> inputs = np.random.randn(1000, 100, 10).astype('f')
    >>> outputs = np.random.randint(50, size=(1000, 100)).astype('i')

    *Training*

    Training the model can be as simple as calling the :func:`train()
    <theanets.graph.Network.train>` method:

    >>> model.train([inputs, outputs])

    See :ref:`guide-training` for more information.

    *Use*

    A model can be used to :func:`predict() <theanets.graph.Network.predict>`
    the output of some input data points:

    >>> test = np.random.randn(3, 200, 10).astype('f')
    >>> print(model.predict(test))

    This method returns a two-dimensional array containing the most likely class
    for each input example and time step.

    Note that the test data does not need to have the same number of time steps
    as the training data.

    To retrieve the probabilities of the classes for each example, use
    :func:`predict_proba() <theanets.feedforward.Classifier.predict_proba>`:

    >>> model.predict_proba(test).shape
    (3, 100, 50)

    Recurrent classifiers have a :func:`predict_sequence` helper method that
    predicts values in an ongoing sequence. Given a seed value, the model
    predicts one time step ahead, then adds the prediction to the seed, predicts
    one more step ahead, and so on:

    >>> seed = np.random.randint(50, size=10).astype('i')
    >>> print(model.predict_sequence(seed, 100))

    See :class:`Text` for more utility code that is helpful for working with
    sequences of class labels.

    See also :ref:`guide-using` for more information.

    Notes
    -----

    Classifier models default to a :class:`cross-entropy
    <theanets.losses.CrossEntropy>` loss. To use a different loss, provide a
    non-default argument for the ``loss`` keyword argument when constructing
    your model.
    '''

    INPUT_NDIM = 3
    '''Number of dimensions for holding input data arrays.'''

    OUTPUT_NDIM = 2
    '''Number of dimensions for holding output data arrays.'''

[docs]    def predict_sequence(self, labels, steps, streams=1, rng=None):
        '''Draw a sequential sample of class labels from this network.

        Parameters
        ----------
        labels : list of int
            A list of integer class labels to get the classifier started.
        steps : int
            The number of time steps to sample.
        streams : int, optional
            Number of parallel streams to sample from the model. Defaults to 1.
        rng : :class:`numpy.random.RandomState` or int, optional
            A random number generator, or an integer seed for a random number
            generator. If not provided, the random number generator will be
            created with an automatically chosen seed.

        Yields
        ------
        label(s) : int or list of int
            Yields at each time step an integer class label sampled sequentially
            from the model. If the number of requested streams is greater than
            1, this will be a list containing the corresponding number of class
            labels.
        '''
        if rng is None or isinstance(rng, int):
            rng = np.random.RandomState(rng)
        offset = len(labels)
        batch = max(2, streams)
        inputs = np.zeros((batch, offset + steps, self.layers[0].output_size), 'f')
        inputs[:, np.arange(offset), labels] = 1
        for i in range(offset, offset + steps):
            chars = []
            for pdf in self.predict_proba(inputs[:i])[:, -1]:
                try:
                    c = rng.multinomial(1, pdf).argmax(axis=-1)
                except ValueError:
                    # sometimes the pdf triggers a normalization error. just
                    # choose greedily in this case.
                    c = pdf.argmax(axis=-1)
                chars.append(int(c))
            inputs[np.arange(batch), i, chars] = 1
            yield chars[0] if streams == 1 else chars