6 Implementación.
Finalmente, mediante la ecuación de Bellman.
\[ v(x) = \min_{a\in \mathcal{A}(x)} \{E\left[R(x,a,\xi) + \beta v(f(x,a,\xi))\right]\} \tag{6.1}\]
y las siguientes condiciones iniciales.
- X_0 = 60.
- P_V = 30, P_C = 60.
- eta = 0.1.
y considerando una demanda \(\xi_t\) con \(\xi_t \sim \text{Geo}(p)\), con \(p = 0.05\). Ejecutamos el siguiente código para resolverlo.
import numpy as np
import pandas as pd
from scipy.interpolate import CubicSpline
= 0.05 # Parametro Demanda Aleatoria
RV_P
= 20 # Precio de Venta
REW_SALE = 10 # Costo Unitario
REW_COST = 0.1 # Factor de Pérdida
DYN_ETA = 100 # Capacidad máxima del inventario
M_K
= 60 # Inventario Inicial
X_0 = 100 # Periodo Inicial
PERIOD
def demmand(p):
"""
Define the Random demmand in the inventory model.
Params:
p (float): Probabiliy parameter.
Return:
d (float): Random value
"""
= np.random.geometric(p)
d return d
def admisible_actions(x, k):
"""
Calculate the admisible actions for each state X
Params:
x (int): state value.
k (int): max inventory capacity.
Returns:
v (list): admisible action set by value x
"""
= list(range(k-x+1))
v return v
def reward_function(x, a, d, p_v, p_c):
"""
Reward Function: Alternative Cost Function
Params:
x (int): state value
a (int): action value
d (int): demmand value
p_v (float): sale price
p_c (float): unitary cost
Return:
r (float): Reward by x,a and d with parameters p_v and p_c
"""
= 1 # Activate or Deactivate additional cost.
lost_state = max(d - x - a, 0)
lost_cost = a + lost_state * lost_cost
c = p_v * min(x + a, d) - p_c * c
r return r
def dynamic(x, a, d, eta):
"""
Calcuate the next state
Params:
x (int): state value
a (int): action value
d (int): demmand value
eta (float): loss parameter
Return:
xp1 (int): next state
"""
= x + a - d - np.floor(eta * x)
xp1 = max(xp1, 0)
xp1 return xp1
def expected_reward(x, a, d_v):
"""
Calculate the Expected Reward
x (int): state value
a (int): action value
d_v (list): random demmand vector
Return:
data (list) reward demmand function (next to calculate mean)
"""
= REW_SALE
p_v = REW_COST
p_c = [reward_function(x, a, d, p_v, p_c) for d in d_v]
data return data
def expected_vf(vf, x_0, a, d_v):
"""
Calculate the function v
Params:
vf (function): Interpolation
x_0 (int): state value
a (int) : action value
d_v (list): demmand random sample
Return:
vf_d (int): Expected vf function
"""
= [float(vf(dynamic(x_0, a, d, DYN_ETA))) for d in d_v]
vf_d = int(np.array(vf_d).mean())
vf_d return vf_d
= list(range(M_K + 1))
STATES_SET def evol_vk(vk, hk, beta, der):
"""
Calculus of v_k
Params:
vk (list): iterative function vk
hk (list): iterative function hk
beta (float):
der (list):
Return:
vk (list): next function iterative hk
hk (list): next function iterative vk
"""
for u, x_k in enumerate(STATES_SET):
= admisible_actions(x_k, M_K)
a_x # print("Admisible Actions",a_x)
= []
cost_ax = []
expec_ax = CubicSpline(STATES_SET, vk)
v_function for action in a_x:
= [reward_function(x_k, action, d, REW_SALE, REW_COST) for d in der]
cost = int(np.array(cost).mean())
cost
cost_ax.append(cost)= expected_vf(v_function, x_k, action, der)
e_vf
expec_ax.append(e_vf)= np.array(cost_ax)
v_cost = np.array(expec_ax)
v_vf = v_cost + beta * v_vf
v_sum = v_sum.max()
v_max = v_max
vk[u] = v_sum.argmax()
hk[u] return vk, hk
= pd.DataFrame()
final_df
for ks in range(10):
print(ks)
= [demmand(RV_P) for _ in range(200)]
demmand_vector = list(STATES_SET) # v_0(x) = x
v_0 = np.zeros(len(v_0))
h_0
= [v_0]
v_mem = [h_0]
h_mem
= 0.9
BPARfor kt in range(25):
= evol_vk(v_0, h_0, beta= BPAR, der = demmand_vector)
v_1, h_1
v_mem.append(v_1)
h_mem.append(h_1)# print(f"{kt+1} - {kt}",np.array(v_1) - np.array(v_mem[kt]))
= v_1.copy()
v_0 = h_1.copy()
h_0
= CubicSpline(STATES_SET, h_mem[-1])
test_eval
= pd.DataFrame(index = list(range(PERIOD)),
df =["state","action","demmand","r"])
columns
= [X_0]
x_hist = []
a_hist = []
d_hist for q in range(PERIOD):
= int(test_eval(x_hist[q]))
a = demmand(RV_P)
d = reward_function(x_hist[q], a, d, REW_SALE, REW_COST)
r = [x_hist[q], a, d, r]
df.loc[q] = dynamic(x_hist[q], a, d, DYN_ETA)
xp1
x_hist.append(xp1)
a_hist.append(a)
d_hist.append(d)'SAMPLE'] = ks
df[if ks == 0:
= df
final_df else:
= pd.concat([final_df, df])
final_df
# Save data in DataFrame
f"S_{PERIOD}_({M_K, DYN_ETA})_{RV_P}_({REW_SALE, REW_COST}).xlsx") final_df.to_excel(
Código para la Visualización.
# Librerias Requeridas
import pandas as pd
import matplotlib.pyplot as plt
# Leemos los datoa generados.
= pd.read_excel("S_100_((100, 0.1))_0.05_((20, 10)).xlsx", index_col=0)
data print(data)
# Hacemos las graficas solcitadas.
= data['SAMPLE'].unique()
samples = plt.subplots(figsize = (6, 4)) # Grafica de Nivel de Inventario.
figure_x, ax0 = plt.subplots(figsize = (6, 4)) # Grafica de la Recompensa por etapa
figure_r, ax1 = plt.subplots(figsize = (6, 4)) # Gráfica de la Recompensa acumulada.
figure_acr, ax2 = plt.subplots(figsize = (6, 4)) # Gráfica de las politicas.
figure_act, ax3 = 0
x = 0
r = 0
acr for w, s in enumerate(samples[:3]):
= data[data['SAMPLE'] == s]
data_s 'state'], lw = 0.7, label = f"l_{s}")
ax0.plot(data_s["Stages")
ax0.set_xlabel("Level")
ax0.set_ylabel('r'], lw = 0.7, label = f"l_{s}")
ax1.plot(data_s["Reward")
ax1.set_ylabel("Stages")
ax1.set_xlabel('r'].cumsum(), lw = 0.7, label = f"l_{s}")
ax2.plot(data_s["Stages")
ax2.set_xlabel("Acc. Reward")
ax2.set_ylabel('action'], lw = 0.7, label = f"l_{s}")
ax3.plot(data_s["Stages")
ax3.set_xlabel("Action")
ax3.set_ylabel(if w == 0:
= data_s['state']
x = data_s['r']
r = data_s['r'].cumsum()
acr else:
+= data_s['state']
x += data_s['r']
r += data_s['r'].cumsum()
acr / 3, lw = 0.7, color = "black", label = "x mean")
ax0.plot(x / 3, lw = 0.7, color = "black", label = "r mean")
ax1.plot(r / 3, lw = 0.7, color = "black", label = "acc. r mean")
ax2.plot(acr
figure_x.legend()
figure_x.tight_layout()"Data_x.png")
figure_x.savefig(
figure_r.legend()
figure_r.tight_layout()"Data_r.png")
figure_r.savefig(= "upper center")
figure_acr.legend(loc
figure_acr.tight_layout()"Data_acr.png")
figure_acr.savefig(
figure_act.legend()
figure_act.tight_layout()"Data_act.png")
figure_act.savefig( plt.show()