Source code for pyqlearning.functionapproximator.convolutional_lstm_fa

# -*- coding: utf-8 -*-
import numpy as np
from logging import getLogger, StreamHandler, NullHandler, DEBUG, ERROR

from pyqlearning.function_approximator import FunctionApproximator

from pydbm.cnn.convolutional_neural_network import ConvolutionalNeuralNetwork
from pydbm.cnn.layerablecnn.convolution_layer import ConvolutionLayer
from pydbm.cnn.layerable_cnn import LayerableCNN
from pydbm.cnn.feature_generator import FeatureGenerator
from pydbm.optimization.opt_params import OptParams
from pydbm.verification.interface.verificatable_result import VerificatableResult
from pydbm.loss.interface.computable_loss import ComputableLoss

from pydbm.rnn.lstmmodel.conv_lstm_model import ConvLSTMModel

# Loss function.
from pydbm.loss.mean_squared_error import MeanSquaredError
# Adam as a optimizer.
from pydbm.optimization.optparams.adam import Adam
# Verification.
from pydbm.verification.verificate_function_approximation import VerificateFunctionApproximation


[docs]class ConvolutionalLSTMFA(FunctionApproximator):
    '''
    Convolutional LSTM Networks as a Function Approximator,
    which is a model that structurally couples convolution operators to LSTM networks, 
    can be utilized as components in constructing the Function Approximator.
    
    References:
        - Sainath, T. N., Vinyals, O., Senior, A., & Sak, H. (2015, April). Convolutional, long short-term memory, fully connected deep neural networks. In Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on (pp. 4580-4584). IEEE.
        - Xingjian, S. H. I., Chen, Z., Wang, H., Yeung, D. Y., Wong, W. K., & Woo, W. C. (2015). Convolutional LSTM network: A machine learning approach for precipitation nowcasting. In Advances in neural information processing systems (pp. 802-810).
    '''
    
    __next_action_arr_list = []
    
    def __init__(
        self,
        batch_size,
        conv_lstm_model,
        seq_len=10,
        learning_rate=1e-05,
        computable_loss=None,
        opt_params=None,
        verificatable_result=None,
        verbose_mode=False
    ):
        '''
        Init.

        Args:
            batch_size:                     Batch size in mini-batch.
            conv_lstm_model:                is-a `ConvLSTMModel`.
            seq_len:                        The length of sequences.
            learning_rate:                  Learning rate.
            computable_loss:                is-a `ComputableLoss`.
            opt_params:                     is-a `OptParams`.
            verificatable_result:           is-a `VerificateFunctionApproximation`.
            verbose_mode:                   Verbose mode or not.

        '''
        logger = getLogger("pydbm")
        handler = StreamHandler()
        if verbose_mode is True:
            handler.setLevel(DEBUG)
            logger.setLevel(DEBUG)
        else:
            handler.setLevel(ERROR)
            logger.setLevel(ERROR)

        logger.addHandler(handler)

        self.__logger = getLogger("pyqlearning")
        handler = StreamHandler()
        if verbose_mode is True:
            self.__logger.setLevel(DEBUG)
        else:
            self.__logger.setLevel(ERROR)
            
        self.__logger.addHandler(handler)

        if isinstance(conv_lstm_model, ConvLSTMModel) is False:
            raise TypeError()

        if computable_loss is None:
            computable_loss = MeanSquaredError()
        if verificatable_result is None:
            verificatable_result = VerificateFunctionApproximation()
        if opt_params is None:
            opt_params = Adam()
            opt_params.weight_limit = 0.5
            opt_params.dropout_rate = 0.0

        self.__conv_lstm_model = conv_lstm_model
        self.__seq_len = seq_len
        self.__batch_size = batch_size
        self.__computable_loss = computable_loss
        self.__learning_rate = learning_rate
        self.__verbose_mode = verbose_mode
        self.__loss_list = []

[docs]    def learn_q(self, predicted_q_arr, real_q_arr):
        '''
        Infernce Q-Value.
        
        Args:
            predicted_q_arr:    `np.ndarray` of predicted Q-Values.
            real_q_arr:         `np.ndarray` of real Q-Values.
        '''
        loss = self.__computable_loss.compute_loss(predicted_q_arr, real_q_arr)
        delta_arr = self.__computable_loss.compute_delta(predicted_q_arr, real_q_arr)
        delta_arr, grads_list = self.__conv_lstm_model.back_propagation(predicted_q_arr, delta_arr)
        self.__conv_lstm_model.optimize(grads_list, self.__learning_rate, 1)
        self.__loss_list.append(loss)

[docs]    def inference_q(self, next_action_arr):
        '''
        Infernce Q-Value.
        
        Args:
            next_action_arr:     `np.ndarray` of action.
        
        Returns:
            `np.ndarray` of Q-Values.
        '''
        self.__next_action_arr_list.append(next_action_arr)
        while len(self.__next_action_arr_list) > self.__seq_len:
            self.__next_action_arr_list = self.__next_action_arr_list[1:]
        while len(self.__next_action_arr_list) < self.__seq_len:
            self.__next_action_arr_list.append(self.__next_action_arr_list[-1])

        _next_action_arr = np.array(self.__next_action_arr_list)
        _next_action_arr = _next_action_arr.transpose((1, 0, 2, 3, 4))

        q_arr = self.__conv_lstm_model.inference(_next_action_arr)
        return q_arr[:, -1].reshape((q_arr.shape[0], 1))

[docs]    def get_model(self):
        '''
        `object` of model as a function approximator,
        which has `conv_lstm_model` whose type is 
        `pydbm.rnn.lstmmodel.conv_lstm_model.ConvLSTMModel`.
        '''
        class Model(object):
            def __init__(self, conv_lstm_model):
                self.conv_lstm_model = conv_lstm_model

        return Model(self.__conv_lstm_model)

[docs]    def set_model(self, value):
        '''
        Model as a function approximator.
        '''
        raise TypeError("This property must be read-only.")

    model = property(get_model, set_model)

[docs]    def get_loss_list(self):
        ''' getter '''
        return self.__loss_list

[docs]    def set_loss_list(self, value):
        ''' setter '''
        self.__loss_list = value
    
    loss_list = property(get_loss_list, set_loss_list)