Source code for pygan.generativemodel.lstm_model

# -*- coding: utf-8 -*-
import numpy as np
from logging import getLogger

from pygan.generative_model import GenerativeModel
from pygan.true_sampler import TrueSampler

# LSTM Graph which is-a `Synapse`.
from pydbm.synapse.recurrenttemporalgraph.lstm_graph import LSTMGraph
# Loss function.
from pydbm.loss.interface.computable_loss import ComputableLoss
from pydbm.loss.mean_squared_error import MeanSquaredError
# SGD.
from pydbm.optimization.optparams.sgd import SGD
# Verification.
from pydbm.verification.verificate_function_approximation import VerificateFunctionApproximation

from pydbm.activation.interface.activating_function_interface import ActivatingFunctionInterface
# Logistic Function as activation function.
from pydbm.activation.logistic_function import LogisticFunction
# Tanh Function as activation function.
from pydbm.activation.tanh_function import TanhFunction

from pydbm.rnn.lstm_model import LSTMModel as LSTM


[docs]class LSTMModel(GenerativeModel):
    '''
    LSTM as a Generator.

    Originally, Long Short-Term Memory(LSTM) networks as a 
    special RNN structure has proven stable and powerful for 
    modeling long-range dependencies.

    The Key point of structural expansion is its memory cell 
    which essentially acts as an accumulator of the state information. 
    Every time observed data points are given as new information and input 
    to LSTM’s input gate, its information will be accumulated to the cell 
    if the input gate is activated. The past state of cell could be forgotten 
    in this process if LSTM’s forget gate is on. Whether the latest cell output 
    will be propagated to the final state is further controlled by the output gate.

    References:
        - Cho, K., Van Merriënboer, B., Gulcehre, C., Bahdanau, D., Bougares, F., Schwenk, H., & Bengio, Y. (2014). Learning phrase representations using RNN encoder-decoder for statistical machine translation. arXiv preprint arXiv:1406.1078.
        - Malhotra, P., Ramakrishnan, A., Anand, G., Vig, L., Agarwal, P., & Shroff, G. (2016). LSTM-based encoder-decoder for multi-sensor anomaly detection. arXiv preprint arXiv:1607.00148.
        - Zaremba, W., Sutskever, I., & Vinyals, O. (2014). Recurrent neural network regularization. arXiv preprint arXiv:1409.2329.

    '''

    def __init__(
        self,
        lstm_model=None,
        computable_loss=None,
        batch_size=20,
        input_neuron_count=100,
        hidden_neuron_count=300,
        observed_activating_function=None,
        input_gate_activating_function=None,
        forget_gate_activating_function=None,
        output_gate_activating_function=None,
        hidden_activating_function=None,
        output_activating_function=None,
        seq_len=10,
        join_io_flag=False,
        learning_rate=1e-05,
        learning_attenuate_rate=0.1,
        attenuate_epoch=50
    ):
        '''
        Init.

        Args:
            lstm_model:                         is-a `lstm_model`.
            computable_loss:                    is-a `ComputableLoss`.

            batch_size:                         Batch size.
                                                This parameters will be refered only when `lstm_model` is `None`.

            input_neuron_count:                 The number of input units.
                                                This parameters will be refered only when `lstm_model` is `None`.

            hidden_neuron_count:                The number of hidden units.
                                                This parameters will be refered only when `lstm_model` is `None`.

            observed_activating_function:       is-a `ActivatingFunctionInterface` in hidden layer.
                                                This parameters will be refered only when `lstm_model` is `None`.
                                                If `None`, this value will be `TanhFunction`.

            input_gate_activating_function:     is-a `ActivatingFunctionInterface` in hidden layer.
                                                This parameters will be refered only when `lstm_model` is `None`.
                                                If `None`, this value will be `LogisticFunction`.

            forget_gate_activating_function:    is-a `ActivatingFunctionInterface` in hidden layer.
                                                This parameters will be refered only when `lstm_model` is `None`.
                                                If `None`, this value will be `LogisticFunction`.

            output_gate_activating_function:    is-a `ActivatingFunctionInterface` in hidden layer.
                                                This parameters will be refered only when `lstm_model` is `None`.
                                                If `None`, this value will be `LogisticFunction`.

            hidden_activating_function:         is-a `ActivatingFunctionInterface` in hidden layer.
                                                This parameters will be refered only when `lstm_model` is `None`.

            output_activating_function:         is-a `ActivatingFunctionInterface` in output layer.
                                                This parameters will be refered only when `lstm_model` is `None`.
                                                If `None`, this model outputs from LSTM's hidden layer in inferencing.

            seq_len:                            The length of sequences.
                                                This means refereed maxinum step `t` in feedforward.

            join_io_flag:                       If this value and value of `output_activating_function` is not `None`,
                                                This model outputs tensors combining observed data points and inferenced data
                                                in a series direction.

            learning_rate:                      Learning rate.
            learning_attenuate_rate:            Attenuate the `learning_rate` by a factor of this value every `attenuate_epoch`.
            attenuate_epoch:                    Attenuate the `learning_rate` by a factor of `learning_attenuate_rate` every `attenuate_epoch`.
                                                Additionally, in relation to regularization,
                                                this class constrains weight matrixes every `attenuate_epoch`.

        '''
        if computable_loss is None:
            computable_loss = MeanSquaredError()

        if lstm_model is not None:
            if isinstance(lstm_model, LSTM) is False:
                raise TypeError()
        else:
            # Init.
            graph = LSTMGraph()

            # Activation function in LSTM.
            if observed_activating_function is None:
                graph.observed_activating_function = TanhFunction()
            else:
                if isinstance(observed_activating_function, ActivatingFunctionInterface) is False:
                    raise TypeError()
                graph.observed_activating_function = observed_activating_function
            
            if input_gate_activating_function is None:
                graph.input_gate_activating_function = LogisticFunction()
            else:
                if isinstance(input_gate_activating_function, ActivatingFunctionInterface) is False:
                    raise TypeError()
                graph.input_gate_activating_function = input_gate_activating_function
            
            if forget_gate_activating_function is None:
                graph.forget_gate_activating_function = LogisticFunction()
            else:
                if isinstance(forget_gate_activating_function, ActivatingFunctionInterface) is False:
                    raise TypeError()
                graph.forget_gate_activating_function = forget_gate_activating_function
            
            if output_gate_activating_function is None:
                graph.output_gate_activating_function = LogisticFunction()
            else:
                if isinstance(output_gate_activating_function, ActivatingFunctionInterface) is False:
                    raise TypeError()
                graph.output_gate_activating_function = output_gate_activating_function

            if hidden_activating_function is None:
                graph.hidden_activating_function = TanhFunction()
            else:
                if isinstance(hidden_activating_function, ActivatingFunctionInterface) is False:
                    raise TypeError()
                graph.hidden_activating_function = hidden_activating_function

            if output_activating_function is None:
                graph.output_activating_function = TanhFunction()
                self.__output_flag = False
                output_neuron_count = 1
            else:
                graph.output_activating_function = output_activating_function
                self.__output_flag = True
                output_neuron_count = hidden_neuron_count

            # Initialization strategy.
            # This method initialize each weight matrices and biases in Gaussian distribution: `np.random.normal(size=hoge) * 0.01`.
            graph.create_rnn_cells(
                input_neuron_count=input_neuron_count,
                hidden_neuron_count=hidden_neuron_count,
                output_neuron_count=output_neuron_count
            )

            opt_params = SGD()
            opt_params.weight_limit = 1e+10
            opt_params.dropout_rate = 0.0

            lstm_model = LSTM(
                # Delegate `graph` to `LSTMModel`.
                graph=graph,
                # The number of epochs in mini-batch training.
                epochs=100,
                # The batch size.
                batch_size=batch_size,
                # Learning rate.
                learning_rate=learning_rate,
                # Attenuate the `learning_rate` by a factor of this value every `attenuate_epoch`.
                learning_attenuate_rate=learning_attenuate_rate,
                # Attenuate the `learning_rate` by a factor of `learning_attenuate_rate` every `attenuate_epoch`.
                attenuate_epoch=attenuate_epoch,
                # The length of sequences.
                seq_len=seq_len,
                # Refereed maxinum step `t` in BPTT. If `0`, this class referes all past data in BPTT.
                bptt_tau=seq_len,
                # Size of Test data set. If this value is `0`, the validation will not be executed.
                test_size_rate=0.3,
                # Loss function.
                computable_loss=computable_loss,
                # Optimizer.
                opt_params=opt_params,
                # Verification function.
                verificatable_result=VerificateFunctionApproximation(),
                tol=0.0
            )

        self.__lstm_model = lstm_model
        self.__seq_len = seq_len
        self.__learning_rate = learning_rate
        self.__join_io_flag = join_io_flag
        self.__computable_loss = computable_loss
        self.__loss_list = []
        self.__epoch_counter = 0
        self.__learning_attenuate_rate = learning_attenuate_rate
        self.__attenuate_epoch = attenuate_epoch

        logger = getLogger("pygan")
        self.__logger = logger

[docs]    def draw(self):
        '''
        Draws samples from the `fake` distribution.

        Returns:
            `np.ndarray` of samples.
        '''
        observed_arr = self.noise_sampler.generate()
        arr = self.inference(observed_arr)
        return arr

[docs]    def inference(self, observed_arr):
        '''
        Draws samples from the `fake` distribution.

        Args:
            observed_arr:     `np.ndarray` of observed data points.
        
        Returns:
            `np.ndarray` of inferenced data.
        '''
        inferenced_arr = self.__lstm_model.inference(observed_arr)
        if self.__output_flag is True:
            self.__inferenced_arr = inferenced_arr
            if self.__join_io_flag is False:
                return inferenced_arr
            else:
                return np.concatenate([observed_arr, inferenced_arr], axis=1)
        else:
            return self.__lstm_model.get_feature_points()

[docs]    def learn(self, grad_arr):
        '''
        Update this Discriminator by ascending its stochastic gradient.

        Args:
            grad_arr:   `np.ndarray` of gradients.

        Returns:
            `np.ndarray` of delta or gradients.

        '''
        if ((self.__epoch_counter + 1) % self.__attenuate_epoch == 0):
            self.__learning_rate = self.__learning_rate * self.__learning_attenuate_rate

        if self.__output_flag is True:
            if self.__join_io_flag is False:
                delta_arr, grads_list = self.__lstm_model.back_propagation(self.__inferenced_arr, grad_arr)
            else:
                grad_arr = grad_arr[:, self.__seq_len:]
                delta_arr, grads_list = self.__lstm_model.back_propagation(self.__inferenced_arr, grad_arr)
        else:
            if grad_arr.ndim > 3:
                grad_arr = grad_arr.reshape((
                    grad_arr.shape[0],
                    grad_arr.shape[1],
                    -1
                ))
                grad_arr = grad_arr[:, -1]
            elif grad_arr.ndim == 3:
                grad_arr = grad_arr[:, -1]

            delta_arr, _, grads_list = self.__lstm_model.hidden_back_propagate(grad_arr)
            grads_list.insert(0, None)
            grads_list.insert(0, None)

        self.__lstm_model.optimize(
            grads_list,
            self.__learning_rate,
            self.__epoch_counter
        )
        self.__epoch_counter += 1

        return delta_arr

[docs]    def switch_inferencing_mode(self, inferencing_mode=True):
        '''
        Set inferencing mode in relation to concrete regularizations.

        Args:
            inferencing_mode:       Inferencing mode or not.
        '''
        self.__lstm_model.opt_params.inferencing_mode = inferencing_mode

[docs]    def get_lstm_model(self):
        ''' getter '''
        return self.__lstm_model
    
[docs]    def set_lstm_model(self, value):
        ''' setter '''
        raise TypeError("This property must be read-only.")
    
    lstm_model = property(get_lstm_model, set_lstm_model)