Source code for pyqlearning.annealingmodel.costfunctionable.greedy_q_learning_cost

# -*- coding: utf-8 -*-
from pyqlearning.annealingmodel.cost_functionable import CostFunctionable
from pyqlearning.qlearning.greedy_q_learning import GreedyQLearning
from copy import copy
import pandas as pd


[docs]class GreedyQLearningCost(CostFunctionable):
    '''
    Cost function for Epsilon Greedy Q-Learning
    which is-a `CostFunctionable` in relation to `AnnealingModel`.
    '''
    
    __init_state_key = None
    
    def __init__(
        self,
        greedy_q_learning,
        init_state_key
    ):
        '''
        Init.
        
        Args:
            greedy_q_learning:     is-a `GreedyQLearning`.
            init_state_key:        First state key.
        '''
        if isinstance(greedy_q_learning, GreedyQLearning):
            self.__greedy_q_learning = greedy_q_learning
        else:
            raise TypeError()

        self.__init_state_key = init_state_key
    
[docs]    def compute(self, x):
        '''
        Compute cost.
        
        Args:
            x:    `np.ndarray` of explanatory variables.
        
        Returns:
            cost
        '''
        q_learning = copy(self.__greedy_q_learning)
        q_learning.epsilon_greedy_rate = x[0]
        q_learning.alpha_value = x[1]
        q_learning.gamma_value = x[2]
        if self.__init_state_key is not None:
            q_learning.learn(state_key=self.__init_state_key, limit=int(x[3]))
        else:
            q_learning.learn(limit=x[3])
        q_sum = q_learning.q_df.q_value.sum()
        if q_sum != 0:
            cost = q_learning.q_df.shape[0] / q_sum
        else:
            cost = q_learning.q_df.shape[0] / 1e-4

        return cost