Source code for pyqlearning.functionapproximator.cnn_fa

# -*- coding: utf-8 -*-
import numpy as np
from logging import getLogger, StreamHandler, NullHandler, DEBUG, ERROR

from pyqlearning.function_approximator import FunctionApproximator

from pydbm.cnn.convolutional_neural_network import ConvolutionalNeuralNetwork
from pydbm.cnn.layerablecnn.convolution_layer import ConvolutionLayer
from pydbm.cnn.layerable_cnn import LayerableCNN
from pydbm.cnn.feature_generator import FeatureGenerator
from pydbm.optimization.opt_params import OptParams
from pydbm.verification.interface.verificatable_result import VerificatableResult
from pydbm.loss.interface.computable_loss import ComputableLoss
from pydbm.synapse.cnn_output_graph import CNNOutputGraph

# Loss function.
from pydbm.loss.mean_squared_error import MeanSquaredError
# Adam as a optimizer.
from pydbm.optimization.optparams.adam import Adam
# Verification.
from pydbm.verification.verificate_function_approximation import VerificateFunctionApproximation

[docs]class CNNFA(FunctionApproximator): ''' Convolutional Neural Networks(CNNs) as a Function Approximator. CNNs are hierarchical models whose convolutional layers alternate with subsampling layers, reminiscent of simple and complex cells in the primary visual cortex. This class demonstrates that a CNNs can solve generalisation problems to learn successful control policies from observed data points in complex Reinforcement Learning environments. The network is trained with a variant of the Q-learning algorithm, with stochastic gradient descent to update the weights. The Deconvolution also called transposed convolutions “work by swapping the forward and backward passes of a convolution.” (Dumoulin, V., & Visin, F. 2016, p20.) References: - Dumoulin, V., & V,kisin, F. (2016). A guide to convolution arithmetic for deep learning. arXiv preprint arXiv:1603.07285. - Masci, J., Meier, U., Cireşan, D., & Schmidhuber, J. (2011, June). Stacked convolutional auto-encoders for hierarchical feature extraction. In International Conference on Artificial Neural Networks (pp. 52-59). Springer, Berlin, Heidelberg. - Mnih, V., Kavukcuoglu, K., Silver, D., Graves, A., Antonoglou, I., Wierstra, D., & Riedmiller, M. (2013). Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602. ''' def __init__( self, batch_size, layerable_cnn_list, cnn_output_graph, learning_rate=1e-05, learning_attenuate_rate=0.1, attenuate_epoch=50, computable_loss=None, opt_params=None, verificatable_result=None, pre_learned_path_list=None, pre_learned_output_path=None, cnn=None, verbose_mode=False ): ''' Init. Args: batch_size: Batch size in mini-batch. layerable_cnn_list: `list` of `LayerableCNN`. cnn_output_graph: Computation graph which is-a `CNNOutputGraph` to compute parameters in output layer. learning_rate: Learning rate. learning_attenuate_rate: Attenuate the `learning_rate` by a factor of this value every `attenuate_epoch`. attenuate_epoch: Attenuate the `learning_rate` by a factor of `learning_attenuate_rate` every `attenuate_epoch`. computable_loss: is-a `ComputableLoss`. opt_params: is-a `OptParams`. verificatable_result: is-a `VerificateFunctionApproximation`. pre_learned_path_list: `list` of file path that stored pre-learned parameters. This parameters will be refered only when `cnn` is `None`. pre_learned_output_path: File path that stores pre-learned parameters. cnn: is-a `ConvolutionalNeuralNetwork` as a model in this class. If not `None`, `self.__cnn` will be overrided by this `cnn`. If `None`, this class initialize `ConvolutionalNeuralNetwork` by default hyper parameters. verbose_mode: Verbose mode or not. ''' logger = getLogger("pydbm") handler = StreamHandler() if verbose_mode is True: handler.setLevel(DEBUG) logger.setLevel(DEBUG) else: handler.setLevel(ERROR) logger.setLevel(ERROR) logger.addHandler(handler) self.__logger = getLogger("pyqlearning") handler = StreamHandler() if verbose_mode is True: self.__logger.setLevel(DEBUG) else: self.__logger.setLevel(ERROR) self.__logger.addHandler(handler) if computable_loss is None: computable_loss = MeanSquaredError() if verificatable_result is None: verificatable_result = VerificateFunctionApproximation() if opt_params is None: opt_params = Adam() opt_params.weight_limit = 1e+10 opt_params.dropout_rate = 0.0 if cnn is None: cnn = ConvolutionalNeuralNetwork( # The `list` of `ConvolutionLayer`. layerable_cnn_list=layerable_cnn_list, # The number of epochs in mini-batch training. epochs=200, # The batch size. batch_size=batch_size, # Learning rate. learning_rate=learning_rate, # Loss function. computable_loss=computable_loss, # Optimizer. opt_params=opt_params, # Verification. verificatable_result=verificatable_result, # Pre-learned parameters. pre_learned_path_list=pre_learned_path_list, # Others. learning_attenuate_rate=learning_attenuate_rate, attenuate_epoch=attenuate_epoch ) cnn.setup_output_layer(cnn_output_graph, pre_learned_output_path) self.__cnn = cnn self.__batch_size = batch_size self.__computable_loss = computable_loss self.__learning_rate = learning_rate self.__learning_attenuate_rate = learning_attenuate_rate self.__attenuate_epoch = attenuate_epoch self.__verbose_mode = verbose_mode self.__loss_list = [] self.__epoch_counter = 0
[docs] def learn_q(self, predicted_q_arr, real_q_arr): ''' Infernce Q-Value. Args: predicted_q_arr: `np.ndarray` of predicted Q-Values. real_q_arr: `np.ndarray` of real Q-Values. ''' """ if self.__q_shape is None: raise ValueError("Before learning, You should execute `__inference_q`.") """ loss = self.__computable_loss.compute_loss(predicted_q_arr, real_q_arr) delta_arr = self.__computable_loss.compute_delta(predicted_q_arr, real_q_arr) delta_arr = self.__cnn.back_propagation(delta_arr) if ((self.__epoch_counter + 1) % self.__attenuate_epoch == 0): self.__learning_rate = self.__learning_rate * self.__learning_attenuate_rate self.__cnn.optimize(self.__learning_rate, 1) self.__loss_list.append(loss)
[docs] def inference_q(self, next_action_arr): ''' Infernce Q-Value. Args: next_action_arr: `np.ndarray` of action. Returns: `np.ndarray` of Q-Values. ''' q_arr = self.__cnn.inference(next_action_arr) return q_arr
[docs] def get_model(self): ''' `object` of model as a function approximator, which has `cnn` whose type is `pydbm.cnn.pydbm.cnn.convolutional_neural_network.ConvolutionalNeuralNetwork`. ''' class Model(object): def __init__(self, cnn): self.cnn = cnn return Model(self.__cnn)
[docs] def set_model(self, value): ''' `object` of model as a function approximator. ''' raise TypeError("This property must be read-only.")
model = property(get_model, set_model)
[docs] def get_loss_list(self): ''' getter ''' return self.__loss_list
[docs] def set_loss_list(self, value): ''' setter ''' self.__loss_list = value
loss_list = property(get_loss_list, set_loss_list)