Source code for theanets.graph

# -*- coding: utf-8 -*-

r'''This module contains a base class for modeling computation graphs.'''

import downhill
import gzip
import hashlib
import numpy as np
import pickle
import theano
import time
import warnings

from . import layers
from . import losses
from . import regularizers
from . import trainer
from . import util


[docs]class Network(object): '''The network class encapsulates a network computation graph. Notes ----- Computation graphs are organized into :ref:`layers <layers>`. Each layer receives one or more arrays of input data, transforms them, and generates one or more arrays of output data. Outputs in a computation graph are named according to their layer and output type, so the 'pre' output of a layer named 'hid1' would be named 'hid1:pre'. The 'out' output is the default output for a layer. By default the last layer in a network is named 'out'. The parameters in a network graph are optimized by minimizing a :ref:`loss function <losses>` with respect to some set of training data. Typically the value produced by 'out:out' is compared to some target value, creating an error value of some sort. This error value is then propagated back through the computation graph to update the parameters in the model. Parameters ---------- layers : int, tuple, dict, or :class:`Layer <theanets.layers.base.Layer>` A sequence of values specifying the layer configuration for the network. For more information, please see :ref:`guide-creating-specifying-layers`. loss : str or :class:`Loss <theanets.losses.Loss>` The name of a loss function to optimize when training this network model. weighted : bool, optional If True, optimize this model using a "weighted" loss. Weighted losses typically require an additional array as input during optimization. For more information, see :ref:`losses-weighted`. Defaults to False. rng : int or RandomState, optional A seed or numpy ``RandomState`` instance for generating randomness in the model. Defaults to 13. Attributes ---------- layers : list of :class:`Layer <theanets.layers.base.Layer>` A list of the layers in this network model. losses : list of :class:`Loss <theanets.losses.Loss>` A list of losses to be computed when optimizing this network model. ''' DEFAULT_OUTPUT_ACTIVATION = 'linear' '''Default activation for the output layer.''' INPUT_NDIM = 2 '''Number of dimensions for holding input data arrays.''' OUTPUT_NDIM = 2 '''Number of dimensions for holding output data arrays.'''
[docs] def __init__(self, layers=(), loss='mse', weighted=False, rng=13): self._graphs = {} # cache of symbolic computation graphs self._functions = {} # cache of callable feedforward functions self._rng = rng # create layers based on specs provided in the constructor. self.layers = [] for i, layer in enumerate(layers): first = i == 0 last = i == len(layers) - 1 name = 'in' if first else 'out' if last else 'hid{}'.format(i) activation = self.DEFAULT_OUTPUT_ACTIVATION if last else 'relu' self.add_layer(layer=layer, name=name, activation=activation) # bind layers to this graph after construction. this finalizes layer # shapes and does other consistency checks based on the entire graph. [l.bind(self) for l in self.layers] # create a default loss (usually). self.losses = [] if loss and self.layers: self.set_loss(loss, weighted=weighted, target=self.OUTPUT_NDIM, output_name=self.layers[-1].output_name)
[docs] def add_layer(self, layer=None, **kwargs): '''Add a :ref:`layer <layers>` to our network graph. Parameters ---------- layer : int, tuple, dict, or :class:`Layer <theanets.layers.base.Layer>` A value specifying the layer to add. For more information, please see :ref:`guide-creating-specifying-layers`. ''' # if the given layer is a Layer instance, just add it and move on. if isinstance(layer, layers.Layer): self.layers.append(layer) return form = kwargs.pop('form', 'ff' if self.layers else 'input').lower() if isinstance(layer, util.basestring): if not layers.Layer.is_registered(layer): raise util.ConfigurationError('unknown layer type: {}'.format(layer)) form = layer layer = None # if layer is a tuple/list of integers, assume it's a shape. if isinstance(layer, (tuple, list)) and all(isinstance(x, int) for x in layer): kwargs['shape'] = tuple(layer) layer = None # if layer is some other tuple/list, assume it's a list of: # - the name of a layers.Layer class (str) # - the name of an activation function (str) # - the number of units in the layer (int) if isinstance(layer, (tuple, list)): for el in layer: if isinstance(el, util.basestring) and layers.Layer.is_registered(el): form = el elif isinstance(el, util.basestring): kwargs['activation'] = el elif isinstance(el, int): if 'size' in kwargs: raise util.ConfigurationError( 'duplicate layer sizes! {}'.format(kwargs)) kwargs['size'] = el layer = None # if layer is a dictionary, try to extract a form for the layer, and # override our default keyword arguments with the rest. if isinstance(layer, dict): for key, value in layer.items(): if key == 'form': form = value.lower() else: kwargs[key] = value layer = None # if neither shape nor size have been specified yet, check that the # "layer" param is an int and use it for "size". if 'shape' not in kwargs and 'size' not in kwargs and isinstance(layer, int): kwargs['size'] = layer # if it hasn't been provided in some other way yet, set input # dimensionality based on the model. if form == 'input' and 'shape' not in kwargs: kwargs.setdefault('ndim', self.INPUT_NDIM) # set some default layer parameters. if form != 'input': kwargs.setdefault('inputs', self.layers[-1].output_name) kwargs.setdefault('rng', self._rng) if form.lower() == 'tied' and 'partner' not in kwargs: # we look backward through our list of layers for a partner. # any "tied" layer that we find increases a counter by one, # and any "untied" layer decreases the counter by one. our # partner is the first layer we find with count zero. # # this is intended to handle the hopefully common case of a # (possibly deep) tied-weights autoencoder. tied = 1 partner = None for l in self.layers[::-1]: tied += 1 if isinstance(l, layers.Tied) else -1 if tied == 0: partner = l.name break else: raise util.ConfigurationError( 'cannot find partner for "{}"'.format(kwargs)) kwargs['partner'] = partner layer = layers.Layer.build(form, **kwargs) # check that graph inputs have unique names. if isinstance(layer, layers.Input): if any(layer.name == i.name for i in self.inputs): raise util.ConfigurationError( '"{}": duplicate input name!'.format(layer.name)) self.layers.append(layer)
[docs] def add_loss(self, loss=None, **kwargs): '''Add a :ref:`loss function <losses>` to the model. Parameters ---------- loss : str, dict, or :class:`theanets.losses.Loss` A loss function to add. If this is a Loss instance, it will be added immediately. If this is a string, it names a loss function to build and add. If it is a dictionary, it should contain a ``'form'`` key whose string value names the loss function to add. Other arguments will be passed to :func:`theanets.losses.Loss.build`. ''' if isinstance(loss, losses.Loss): self.losses.append(loss) return form = loss or 'mse' if 'form' in kwargs: form = kwargs.pop('form').lower() kw = dict(target=self.INPUT_NDIM, output_name=self.layers[-1].output_name) kw.update(kwargs) if isinstance(loss, dict): loss = dict(loss) if 'form' in loss: form = loss.pop('form').lower() kw.update(loss) self.losses.append(losses.Loss.build(form, **kw))
[docs] def set_loss(self, *args, **kwargs): '''Clear the current loss functions from the network and add a new one. All parameters and keyword arguments are passed to :func:`add_loss` after clearing the current losses. ''' self.losses = [] self.add_loss(*args, **kwargs)
[docs] def itertrain(self, train, valid=None, algo='rmsprop', subalgo='rmsprop', save_every=0, save_progress=None, **kwargs): '''Train our network, one batch at a time. This method yields a series of ``(train, valid)`` monitor pairs. The ``train`` value is a dictionary mapping names to monitor values evaluated on the training dataset. The ``valid`` value is also a dictionary mapping names to values, but these values are evaluated on the validation dataset. Because validation might not occur every training iteration, the validation monitors might be repeated for multiple training iterations. It is probably most helpful to think of the validation monitors as being the "most recent" values that have been computed. After training completes, the network attribute of this class will contain the trained network parameters. Parameters ---------- train : :class:`Dataset <downhill.dataset.Dataset>` or list A dataset to use when training the network. If this is a ``downhill.Dataset`` instance, it will be used directly as the training datset. If it is a list of numpy arrays or a list of callables, it will be converted to a ``downhill.Dataset`` and then used as the training set. valid : :class:`Dataset <downhill.dataset.Dataset>` or list, optional If this is provided, it will be used as a validation dataset. If not provided, the training set will be used for validation. (This is not recommended!) algo : str, optional An optimization algorithm to use for training our network. If not provided, :class:`RMSProp <downhill.adaptive.RMSProp>` will be used. subalgo : str, optional An optimization algorithm to use for a trainer that requires a "sub-algorithm," sugh as an unsupervised pretrainer. Defaults to :class:`RMSProp <downhill.adaptive.RMSProp>`. save_every : int or float, optional If this is nonzero and ``save_progress`` is not None, then the model being trained will be saved periodically. If this is a float, it is treated as a number of minutes to wait between savings. If it is an int, it is treated as the number of training epochs to wait between savings. Defaults to 0. save_progress : str or file handle, optional If this is not None, and ``save_progress`` is nonzero, then save the model periodically during training. This parameter gives either (a) the full path of a file to save the model, or (b) a file-like object where the model should be saved. If it is a string and the given name contains a "{}" format specifier, it will be filled with the integer Unix timestamp at the time the model is saved. Defaults to None, which does not save models. Yields ------ training : dict A dictionary of monitor values computed using the training dataset, at the conclusion of training. This dictionary will at least contain a 'loss' key that indicates the value of the loss function. Other keys may be available depending on the trainer being used. validation : dict A dictionary of monitor values computed using the validation dataset, at the conclusion of training. ''' if 'rng' not in kwargs: kwargs['rng'] = self._rng def create_dataset(data, **kwargs): name = kwargs.get('name', 'dataset') s = '{}_batches'.format(name) return downhill.Dataset( data, name=name, batch_size=kwargs.get('batch_size', 32), iteration_size=kwargs.get('iteration_size', kwargs.get(s)), axis=kwargs.get('axis', 0), rng=kwargs['rng']) # set up datasets ... if valid is None: valid = train if not isinstance(valid, downhill.Dataset): valid = create_dataset(valid, name='valid', **kwargs) if not isinstance(train, downhill.Dataset): train = create_dataset(train, name='train', **kwargs) if 'algorithm' in kwargs: warnings.warn( 'please use the "algo" keyword arg instead of "algorithm"', DeprecationWarning) algo = kwargs.pop('algorithm') if isinstance(algo, (list, tuple)): algo = algo[0] # set up trainer ... if isinstance(algo, util.basestring): algo = algo.lower() if algo == 'sample': algo = trainer.SampleTrainer(self) elif algo.startswith('layer') or algo.startswith('sup'): algo = trainer.SupervisedPretrainer(subalgo, self) elif algo.startswith('pre') or algo.startswith('unsup'): algo = trainer.UnsupervisedPretrainer(subalgo, self) else: algo = trainer.DownhillTrainer(algo, self) # set up check to save model ... def needs_saving(elapsed, iteration): if save_progress is None: return False if isinstance(save_every, float): return elapsed > 60 * save_every if isinstance(save_every, int): return iteration % save_every == 0 return False # train it! start = time.time() for i, monitors in enumerate(algo.itertrain(train, valid, **kwargs)): yield monitors now = time.time() if i and needs_saving(now - start, i): filename_or_handle = save_progress if isinstance(filename_or_handle, util.basestring): filename_or_handle = save_progress.format(int(now)) self.save(filename_or_handle) start = now
[docs] def train(self, *args, **kwargs): '''Train the network until the trainer converges. All arguments are passed to :func:`itertrain`. Returns ------- training : dict A dictionary of monitor values computed using the training dataset, at the conclusion of training. This dictionary will at least contain a 'loss' key that indicates the value of the loss function. Other keys may be available depending on the trainer being used. validation : dict A dictionary of monitor values computed using the validation dataset, at the conclusion of training. ''' monitors = None for monitors in self.itertrain(*args, **kwargs): pass return monitors
def _hash(self, regularizers=()): '''Construct a string key for representing a computation graph. This key will be unique for a given (a) network topology, (b) set of losses, and (c) set of regularizers. Returns ------- key : str A hash representing the computation graph for the current network. ''' def add(s): h.update(str(s).encode('utf-8')) h = hashlib.md5() for l in self.layers: add('{}{}{}'.format(l.__class__.__name__, l.name, l.output_shape)) for l in self.losses: add('{}{}'.format(l.__class__.__name__, l.weight)) for r in regularizers: add('{}{}{}'.format(r.__class__.__name__, r.weight, r.pattern)) return h.hexdigest()
[docs] def build_graph(self, regularizers=()): '''Connect the layers in this network to form a computation graph. Parameters ---------- regularizers : list of :class:`theanets.regularizers.Regularizer` A list of the regularizers to apply while building the computation graph. Returns ------- outputs : list of Theano variables A list of expressions giving the output of each layer in the graph. updates : list of update tuples A list of updates that should be performed by a Theano function that computes something using this graph. ''' key = self._hash(regularizers) if key not in self._graphs: util.log('building computation graph') for loss in self.losses: loss.log() for reg in regularizers: reg.log() outputs = {} updates = [] for layer in self.layers: out, upd = layer.connect(outputs) for reg in regularizers: reg.modify_graph(out) outputs.update(out) updates.extend(upd) self._graphs[key] = outputs, updates return self._graphs[key]
@property def inputs(self): '''A list of Theano variables for feedforward computations.''' return [l.input for l in self.layers if isinstance(l, layers.Input)] @property def variables(self): '''A list of Theano variables for loss computations.''' result = self.inputs seen = set(i.name for i in result) for loss in self.losses: for v in loss.variables: if v.name not in seen: result.append(v) seen.add(v.name) return result @property def params(self): '''A list of the learnable Theano parameters for this network.''' return [p for l in self.layers for p in l.params]
[docs] def find(self, which, param): '''Get a parameter from a layer in the network. Parameters ---------- which : int or str The layer that owns the parameter to return. If this is an integer, then 0 refers to the input layer, 1 refers to the first hidden layer, 2 to the second, and so on. If this is a string, the layer with the corresponding name, if any, will be used. param : int or str Name of the parameter to retrieve from the specified layer, or its index in the parameter list of the layer. Raises ------ KeyError If there is no such layer, or if there is no such parameter in the specified layer. Returns ------- param : Theano shared variable A shared parameter variable from the indicated layer. ''' for i, layer in enumerate(self.layers): if which == i or which == layer.name: return layer.find(param) raise KeyError(which)
[docs] def feed_forward(self, x, **kwargs): '''Compute a forward pass of all layers from the given input. All keyword arguments are passed directly to :func:`build_graph`. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing data to be fed into the network. Multiple examples are arranged as rows in this array, with columns containing the variables for each example. Returns ------- layers : list of ndarray (num-examples, num-units) The activation values of each layer in the the network when given input `x`. For each of the hidden layers, an array is returned containing one row per input example; the columns of each array correspond to units in the respective layer. The "output" of the network is the last element of this list. ''' regs = regularizers.from_kwargs(self, **kwargs) key = self._hash(regs) if key not in self._functions: outputs, updates = self.build_graph(regs) labels, exprs = list(outputs.keys()), list(outputs.values()) util.log('compiling feed_forward function') self._functions[key] = (labels, theano.function( self.inputs, exprs, updates=updates)) labels, f = self._functions[key] return dict(zip(labels, f(x)))
[docs] def predict(self, x, **kwargs): '''Compute a forward pass of the inputs, returning the network output. All keyword arguments end up being passed to :func:`build_graph`. Parameters ---------- x : ndarray (num-examples, num-variables) An array containing data to be fed into the network. Multiple examples are arranged as rows in this array, with columns containing the variables for each example. Returns ------- y : ndarray (num-examples, num-variables) Returns the values of the network output units when given input `x`. Rows in this array correspond to examples, and columns to output variables. ''' return self.feed_forward(x, **kwargs)[self.layers[-1].output_name]
[docs] def score(self, x, y, w=None, **kwargs): '''Compute R^2 coefficient of determination for a given labeled input. Parameters ---------- x : ndarray (num-examples, num-inputs) An array containing data to be fed into the network. Multiple examples are arranged as rows in this array, with columns containing the variables for each example. y : ndarray (num-examples, num-outputs) An array containing expected target data for the network. Multiple examples are arranged as rows in this array, with columns containing the variables for each example. Returns ------- r2 : float The R^2 correlation between the prediction of this netork and its target output. ''' u = y - self.predict(x, **kwargs) v = y - y.mean() if w is None: w = np.ones_like(u) return 1 - (w * u * u).sum() / (w * v * v).sum()
def __getstate__(self): return (self.layers, self.losses) def __setstate__(self, state): self.layers, self.losses = state self._graphs = {} self._functions = {}
[docs] def save(self, filename_or_handle): '''Save the state of this network to a pickle file on disk. Parameters ---------- filename_or_handle : str or file handle Save the state of this network to a pickle file. If this parameter is a string, it names the file where the pickle will be saved. If it is a file-like object, this object will be used for writing the pickle. If the filename ends in ".gz" then the output will automatically be gzipped. ''' if isinstance(filename_or_handle, util.basestring): opener = gzip.open if filename_or_handle.lower().endswith('.gz') else open handle = opener(filename_or_handle, 'wb') else: handle = filename_or_handle pickle.dump(self, handle, -1) if isinstance(filename_or_handle, util.basestring): handle.close() util.log('saved model to {}', filename_or_handle)
[docs] @classmethod def load(cls, filename_or_handle): '''Load a saved network from disk. Parameters ---------- filename_or_handle : str or file handle Load the state of this network from a pickle file. If this parameter is a string, it names the file where the pickle will be saved. If it is a file-like object, this object will be used for reading the pickle. If the filename ends in ".gz" then the output will automatically be gunzipped. ''' assert not isinstance(cls, Network), \ 'cannot load an instance! say instead: net = Network.load(source)' if isinstance(filename_or_handle, util.basestring): opener = gzip.open if filename_or_handle.lower().endswith('.gz') else open handle = opener(filename_or_handle, 'rb') else: handle = filename_or_handle model = pickle.load(handle) if isinstance(filename_or_handle, util.basestring): handle.close() util.log('loaded model from {}', filename_or_handle) return model
[docs] def loss(self, **kwargs): '''Return a variable representing the regularized loss for this network. The regularized loss includes both the :ref:`loss computation <losses>` for the network as well as any :ref:`regularizers <regularizers>` that are in place. Keyword arguments are passed directly to :func:`theanets.regularizers.from_kwargs`. Returns ------- loss : Theano expression A Theano expression representing the loss of this network. ''' regs = regularizers.from_kwargs(self, **kwargs) outputs, _ = self.build_graph(regs) return sum(l.weight * l(outputs) for l in self.losses) + \ sum(r.weight * r.loss(self.layers, outputs) for r in regs)
[docs] def monitors(self, **kwargs): '''Return expressions that should be computed to monitor training. Returns ------- monitors : list of (name, expression) pairs A list of named monitor expressions to compute for this network. ''' regs = regularizers.from_kwargs(self, **kwargs) outputs, _ = self.build_graph(regs) monitors = [('err', self.losses[0](outputs))] def matching(pattern): '''Yield all matching outputs or parameters from the graph.''' for name, expr in util.outputs_matching(outputs, pattern): yield name, expr for name, expr in util.params_matching(self.layers, pattern): yield name, expr def parse_levels(levels): '''Yield named monitor callables.''' if isinstance(levels, dict): levels = levels.items() if isinstance(levels, (int, float)): levels = [levels] for level in levels: if isinstance(level, (tuple, list)): label, call = level yield ':{}'.format(label), call if isinstance(level, (int, float)): def call(expr): return (expr < level).mean() yield '<{}'.format(level), call inputs = kwargs.get('monitors', {}) if isinstance(inputs, dict): inputs = inputs.items() for pattern, levels in inputs: for name, expr in matching(pattern): for key, value in parse_levels(levels): monitors.append(('{}{}'.format(name, key), value(expr))) return monitors
[docs] def updates(self, **kwargs): '''Return expressions to run as updates during network training. Returns ------- updates : list of (parameter, expression) pairs A list of named parameter update expressions for this network. ''' regs = regularizers.from_kwargs(self, **kwargs) _, updates = self.build_graph(regs) return updates