Source code for pyqlearning.functionapproximator.convolutional_lstm_fa

# -*- coding: utf-8 -*-
import numpy as np
from logging import getLogger, StreamHandler, NullHandler, DEBUG, ERROR

from pyqlearning.function_approximator import FunctionApproximator

from pydbm.cnn.convolutional_neural_network import ConvolutionalNeuralNetwork
from pydbm.cnn.layerablecnn.convolution_layer import ConvolutionLayer
from pydbm.cnn.layerable_cnn import LayerableCNN
from pydbm.cnn.feature_generator import FeatureGenerator
from pydbm.optimization.opt_params import OptParams
from pydbm.verification.interface.verificatable_result import VerificatableResult
from pydbm.loss.interface.computable_loss import ComputableLoss

from pydbm.rnn.lstmmodel.conv_lstm_model import ConvLSTMModel

# Loss function.
from pydbm.loss.mean_squared_error import MeanSquaredError
# Adam as a optimizer.
from pydbm.optimization.optparams.adam import Adam
# Verification.
from pydbm.verification.verificate_function_approximation import VerificateFunctionApproximation


[docs]class ConvolutionalLSTMFA(FunctionApproximator): ''' Convolutional LSTM Networks as a Function Approximator, which is a model that structurally couples convolution operators to LSTM networks, can be utilized as components in constructing the Function Approximator. References: - Sainath, T. N., Vinyals, O., Senior, A., & Sak, H. (2015, April). Convolutional, long short-term memory, fully connected deep neural networks. In Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on (pp. 4580-4584). IEEE. - Xingjian, S. H. I., Chen, Z., Wang, H., Yeung, D. Y., Wong, W. K., & Woo, W. C. (2015). Convolutional LSTM network: A machine learning approach for precipitation nowcasting. In Advances in neural information processing systems (pp. 802-810). ''' __next_action_arr_list = [] def __init__( self, batch_size, conv_lstm_model, seq_len=10, learning_rate=1e-05, computable_loss=None, opt_params=None, verificatable_result=None, verbose_mode=False ): ''' Init. Args: batch_size: Batch size in mini-batch. conv_lstm_model: is-a `ConvLSTMModel`. seq_len: The length of sequences. learning_rate: Learning rate. computable_loss: is-a `ComputableLoss`. opt_params: is-a `OptParams`. verificatable_result: is-a `VerificateFunctionApproximation`. verbose_mode: Verbose mode or not. ''' logger = getLogger("pydbm") handler = StreamHandler() if verbose_mode is True: handler.setLevel(DEBUG) logger.setLevel(DEBUG) else: handler.setLevel(ERROR) logger.setLevel(ERROR) logger.addHandler(handler) self.__logger = getLogger("pyqlearning") handler = StreamHandler() if verbose_mode is True: self.__logger.setLevel(DEBUG) else: self.__logger.setLevel(ERROR) self.__logger.addHandler(handler) if isinstance(conv_lstm_model, ConvLSTMModel) is False: raise TypeError() if computable_loss is None: computable_loss = MeanSquaredError() if verificatable_result is None: verificatable_result = VerificateFunctionApproximation() if opt_params is None: opt_params = Adam() opt_params.weight_limit = 0.5 opt_params.dropout_rate = 0.0 self.__conv_lstm_model = conv_lstm_model self.__seq_len = seq_len self.__batch_size = batch_size self.__computable_loss = computable_loss self.__learning_rate = learning_rate self.__verbose_mode = verbose_mode self.__loss_list = []
[docs] def learn_q(self, predicted_q_arr, real_q_arr): ''' Infernce Q-Value. Args: predicted_q_arr: `np.ndarray` of predicted Q-Values. real_q_arr: `np.ndarray` of real Q-Values. ''' loss = self.__computable_loss.compute_loss(predicted_q_arr, real_q_arr) delta_arr = self.__computable_loss.compute_delta(predicted_q_arr, real_q_arr) delta_arr, grads_list = self.__conv_lstm_model.back_propagation(predicted_q_arr, delta_arr) self.__conv_lstm_model.optimize(grads_list, self.__learning_rate, 1) self.__loss_list.append(loss)
[docs] def inference_q(self, next_action_arr): ''' Infernce Q-Value. Args: next_action_arr: `np.ndarray` of action. Returns: `np.ndarray` of Q-Values. ''' self.__next_action_arr_list.append(next_action_arr) while len(self.__next_action_arr_list) > self.__seq_len: self.__next_action_arr_list = self.__next_action_arr_list[1:] while len(self.__next_action_arr_list) < self.__seq_len: self.__next_action_arr_list.append(self.__next_action_arr_list[-1]) _next_action_arr = np.array(self.__next_action_arr_list) _next_action_arr = _next_action_arr.transpose((1, 0, 2, 3, 4)) q_arr = self.__conv_lstm_model.inference(_next_action_arr) return q_arr[:, -1].reshape((q_arr.shape[0], 1))
[docs] def get_model(self): ''' `object` of model as a function approximator, which has `conv_lstm_model` whose type is `pydbm.rnn.lstmmodel.conv_lstm_model.ConvLSTMModel`. ''' class Model(object): def __init__(self, conv_lstm_model): self.conv_lstm_model = conv_lstm_model return Model(self.__conv_lstm_model)
[docs] def set_model(self, value): ''' Model as a function approximator. ''' raise TypeError("This property must be read-only.")
model = property(get_model, set_model)
[docs] def get_loss_list(self): ''' getter ''' return self.__loss_list
[docs] def set_loss_list(self, value): ''' setter ''' self.__loss_list = value
loss_list = property(get_loss_list, set_loss_list)