Source code for pyqlearning.annealingmodel.costfunctionable.boltzmann_q_learning_cost

# -*- coding: utf-8 -*-
from pyqlearning.annealingmodel.cost_functionable import CostFunctionable
from pyqlearning.qlearning.boltzmann_q_learning import BoltzmannQLearning
from copy import copy
import pandas as pd


[docs]class GreedyQLearningCost(CostFunctionable): ''' Cost function for Boltzmann Q-Learning which is-a `CostFunctionable` in relation to `AnnealingModel`. ''' __init_state_key = None def __init__( self, greedy_q_learning, init_state_key ): ''' Init. Args: boltzmann_q_learning: is-a `BoltzmannQLearning`. init_state_key: First state key. ''' if isinstance(boltzmann_q_learning, BoltzmannQLearning): self.__boltzmann_q_learning = boltzmann_q_learning else: raise TypeError() self.__init_state_key = init_state_key
[docs] def compute(self, x): ''' Compute cost. Args: x: `np.ndarray` of explanatory variables. Returns: cost ''' q_learning = copy(self.__greedy_q_learning) q_learning.alpha_value = x[0] q_learning.gamma_value = x[1] if self.__init_state_key is not None: q_learning.learn(state_key=self.__init_state_key, limit=int(x[2])) else: q_learning.learn(limit=x[2]) q_sum = q_learning.q_df.q_value.sum() if q_sum != 0: cost = q_learning.q_df.shape[0] / q_sum else: cost = q_learning.q_df.shape[0] / 1e-4 return cost