Source code for pyqlearning.functionapproximator.convolutional_lstm_fc_fa

# -*- coding: utf-8 -*-
import numpy as np
from logging import getLogger, StreamHandler, NullHandler, DEBUG, ERROR

from pyqlearning.function_approximator import FunctionApproximator

from pydbm.cnn.convolutional_neural_network import ConvolutionalNeuralNetwork
from pydbm.cnn.layerablecnn.convolution_layer import ConvolutionLayer
from pydbm.cnn.layerable_cnn import LayerableCNN
from pydbm.cnn.feature_generator import FeatureGenerator
from pydbm.optimization.opt_params import OptParams
from pydbm.verification.interface.verificatable_result import VerificatableResult
from pydbm.loss.interface.computable_loss import ComputableLoss

from pydbm.rnn.lstm_model import LSTMModel

# Loss function.
from pydbm.loss.mean_squared_error import MeanSquaredError
# Adam as a optimizer.
from pydbm.optimization.optparams.adam import Adam
# Verification.
from pydbm.verification.verificate_function_approximation import VerificateFunctionApproximation


[docs]class ConvolutionalLSTMFCFA(FunctionApproximator): ''' Convolutional LSTM Networks as a Function Approximator like CLDNN Architecture(Sainath, T. N, et al., 2015). This is a model of the function approximator which loosely coupled CNN and LSTM. Like CLDNN Architecture(Sainath, T. N, et al., 2015), this model uses CNNs to reduce the spectral variation of the input feature of rewards, and then passes this to LSTM layers to perform temporal modeling, and finally outputs this to DNN layers, which produces a feature representation of Q-Values that is more easily separable. References: - https://code.accel-brain.com/Deep-Learning-by-means-of-Design-Pattern/pydbm.cnn.html - Sainath, T. N., Vinyals, O., Senior, A., & Sak, H. (2015, April). Convolutional, long short-term memory, fully connected deep neural networks. In Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on (pp. 4580-4584). IEEE. ''' __q_arr_list = [] def __init__( self, batch_size, layerable_cnn_list, lstm_model, seq_len=10, learning_rate=1e-05, computable_loss=None, opt_params=None, verificatable_result=None, pre_learned_path_list=None, cnn=None, verbose_mode=False ): ''' Init. Args: batch_size: Batch size in mini-batch. layerable_cnn_list: `list` of `LayerableCNN`. lstm_model: is-a `LSTMMode`. seq_len: The length of sequences. learning_rate: Learning rate. computable_loss: is-a `ComputableLoss`. opt_params: is-a `OptParams`. verificatable_result: is-a `VerificateFunctionApproximation`. pre_learned_path_list: `list` of file path that stored pre-learned parameters. This parameters will be refered only when `cnn` is `None`. cnn: is-a `ConvolutionalNeuralNetwork` as a model in this class. If not `None`, `self.__cnn` will be overrided by this `cnn`. If `None`, this class initialize `ConvolutionalNeuralNetwork` by default hyper parameters. verbose_mode: Verbose mode or not. ''' logger = getLogger("pydbm") handler = StreamHandler() if verbose_mode is True: handler.setLevel(DEBUG) logger.setLevel(DEBUG) else: handler.setLevel(ERROR) logger.setLevel(ERROR) logger.addHandler(handler) self.__logger = getLogger("pyqlearning") handler = StreamHandler() if verbose_mode is True: self.__logger.setLevel(DEBUG) else: self.__logger.setLevel(ERROR) self.__logger.addHandler(handler) if isinstance(lstm_model, LSTMModel) is False: raise TypeError() if computable_loss is None: computable_loss = MeanSquaredError() if verificatable_result is None: verificatable_result = VerificateFunctionApproximation() if opt_params is None: opt_params = Adam() opt_params.weight_limit = 0.5 opt_params.dropout_rate = 0.0 if cnn is None: cnn = ConvolutionalNeuralNetwork( # The `list` of `ConvolutionLayer`. layerable_cnn_list=layerable_cnn_list, # The number of epochs in mini-batch training. epochs=200, # The batch size. batch_size=batch_size, # Learning rate. learning_rate=learning_rate, # Loss function. computable_loss=computable_loss, # Optimizer. opt_params=opt_params, # Verification. verificatable_result=verificatable_result, # Pre-learned parameters. pre_learned_path_list=pre_learned_path_list, # Others. learning_attenuate_rate=0.1, attenuate_epoch=50 ) self.__cnn = cnn self.__lstm_model = lstm_model self.__seq_len = seq_len self.__batch_size = batch_size self.__computable_loss = computable_loss self.__learning_rate = learning_rate self.__verbose_mode = verbose_mode self.__q_shape = None self.__loss_list = []
[docs] def learn_q(self, predicted_q_arr, real_q_arr): ''' Infernce Q-Value. Args: predicted_q_arr: `np.ndarray` of predicted Q-Values. real_q_arr: `np.ndarray` of real Q-Values. ''' if self.__q_shape is None: raise ValueError("Before learning, You should execute `__inference_q`.") loss = self.__computable_loss.compute_loss(predicted_q_arr, real_q_arr) delta_arr = self.__computable_loss.compute_delta(predicted_q_arr, real_q_arr) delta_arr, lstm_output_grads_list = self.__lstm_model.output_back_propagate( predicted_q_arr, delta_arr ) delta_arr, lstm_hidden_grads_list = self.__lstm_model.hidden_back_propagate( delta_arr ) lstm_grads_list = lstm_output_grads_list lstm_grads_list.extend(lstm_hidden_grads_list) delta_arr = delta_arr[:, -1].reshape(self.__q_shape) delta_arr = self.__cnn.back_propagation(delta_arr) self.__lstm_model.optimize(lstm_grads_list, self.__learning_rate, 1) self.__cnn.optimize(self.__learning_rate, 1) self.__loss_list.append(loss)
[docs] def inference_q(self, next_action_arr): ''' Infernce Q-Value. Args: next_action_arr: `np.ndarray` of action. Returns: `np.ndarray` of Q-Values. ''' q_arr = self.__cnn.inference(next_action_arr) self.__q_shape = q_arr.shape q_arr = q_arr.reshape((q_arr.shape[0], -1)) self.__q_arr_list.append(q_arr) while len(self.__q_arr_list) > self.__seq_len: self.__q_arr_list = self.__q_arr_list[1:] while len(self.__q_arr_list) < self.__seq_len: self.__q_arr_list.append(self.__q_arr_list[-1]) q_arr = np.array(self.__q_arr_list) q_arr = q_arr.transpose((1, 0, 2)) q_arr = self.__lstm_model.inference(q_arr) return q_arr[:, -1].reshape((q_arr.shape[0], 1))
[docs] def get_model(self): ''' `object` of model as a function approximator, which has `cnn` whose type is `pydbm.cnn.pydbm.cnn.convolutional_neural_network.ConvolutionalNeuralNetwork` and `lstm_model` whose type is `pydbm.rnn.lstm_model.LSTMModel`. ''' class Model(object): def __init__(self, cnn, lstm_model): self.cnn = cnn self.lstm_model = lstm_model return Model(self.__cnn, self.__lstm_model)
[docs] def set_model(self, value): ''' Model as a function approximator. ''' raise TypeError("This property must be read-only.")
model = property(get_model, set_model)
[docs] def get_loss_list(self): ''' getter ''' return self.__loss_list
[docs] def set_loss_list(self, value): ''' setter ''' self.__loss_list = value
loss_list = property(get_loss_list, set_loss_list)