Skip to content

Commit

Permalink
fix: backward induction algo
Browse files Browse the repository at this point in the history
  • Loading branch information
killian31 committed Mar 7, 2024
1 parent 8bc8b4f commit e109e5b
Showing 1 changed file with 12 additions and 92 deletions.
104 changes: 12 additions & 92 deletions pages/Parking_Problem_Solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
st.set_page_config(page_title="Parking Problem Solver", page_icon="🚗")
problem_explanation = """
In the context of Markov Decision Processes, the parking problem can be described as follows:
Expand Down Expand Up @@ -91,11 +92,9 @@
st.title("Parking Problem Solver")
# Display the problem explanation
st.markdown(problem_explanation)
# Initialize the parking lot with a given number of slots and proportion of being taken
def initialize_parking_lot(N, p):
slots = ["F"] * N
for i in random.sample(range(N), int(p * N)):
Expand All @@ -108,49 +107,13 @@ def rho(i, N, p_start=0.9, p_end=0.1):
return p_start - ((i - 1) / (N - 1)) * (p_start - p_end)
# Compute the optimal policy using value iteration
def value_iteration(
parking_lot, cost_of_continuing, p_start=0.9, p_end=0.1, reward_func="i"
):
N = len(parking_lot)
V = np.zeros(N + 1) # End of lot has value 0
policy = np.zeros(N, dtype=int) # Initialize policy: 0 for continue, 1 for park
for i in range(N - 1, -1, -1):
if parking_lot[i] == "F":
# Calculate the expected value of continuing
if i < N - 1: # If not at the last slot
rho_i = rho(i, N, p_start, p_end)
continue_value = rho_i * V[i + 1] + (1 - rho_i) * (
V[i + 1] - cost_of_continuing
)
else:
continue_value = 0 # Can't continue from the last slot
if reward_func == "i":
park_value = i + 1 # Reward for parking now
elif reward_func == "2i":
park_value = 2 * (i + 1)
elif reward_func == "i^2":
park_value = (i + 1) ** 2
else:
raise ValueError("Invalid reward function")
V[i] = max(park_value, continue_value - cost_of_continuing)
policy[i] = park_value >= (continue_value - cost_of_continuing)
else:
V[i] = V[i + 1] - cost_of_continuing # Adjust for the cost of continuing
policy[i] = 0
return policy
def backward_induction(N, c, p_start=0.9, p_end=0.1, reward_func="i"):
V_free = np.zeros(N + 1) # Value when the slot is free
V_taken = np.zeros(N + 1) # Value when the slot is taken
policy = [None] * N # Optimal action for each slot
V_free = np.zeros(N + 1)
V_taken = np.zeros(N + 1)
policy = [None] * N
# Boundary conditions
V_free[N] = N # Value of parking in the last free slot
V_taken[N] = 0 # Value of the last slot being taken
V_free[N] = N
V_taken[N] = 0
for i in range(N - 1, -1, -1):
prob_free_next = rho(i + 1, N, p_start, p_end)
Expand Down Expand Up @@ -244,9 +207,6 @@ def plot_parking_lot(parking_lot, policy):
if "parking_lot" not in st.session_state:
st.error("Please generate a parking lot first.")
st.stop()
# policy = value_iteration(
# st.session_state.parking_lot, cost_of_continuing, p_start, p_end, reward_func
# )
values, policy = backward_induction(
N, cost_of_continuing, p_start, p_end, reward_func
)
Expand All @@ -255,6 +215,7 @@ def plot_parking_lot(parking_lot, policy):
''')



problem_explanation = """
In the context of Markov Decision Processes, the parking problem can be described as follows:
Expand Down Expand Up @@ -331,11 +292,9 @@ def plot_parking_lot(parking_lot, policy):

st.title("Parking Problem Solver")

# Display the problem explanation
st.markdown(problem_explanation)


# Initialize the parking lot with a given number of slots and proportion of being taken
def initialize_parking_lot(N, p):
slots = ["F"] * N
for i in random.sample(range(N), int(p * N)):
Expand All @@ -348,49 +307,13 @@ def rho(i, N, p_start=0.9, p_end=0.1):
return p_start - ((i - 1) / (N - 1)) * (p_start - p_end)


# Compute the optimal policy using value iteration
def value_iteration(
parking_lot, cost_of_continuing, p_start=0.9, p_end=0.1, reward_func="i"
):
N = len(parking_lot)
V = np.zeros(N + 1) # End of lot has value 0
policy = np.zeros(N, dtype=int) # Initialize policy: 0 for continue, 1 for park

for i in range(N - 1, -1, -1):
if parking_lot[i] == "F":
# Calculate the expected value of continuing
if i < N - 1: # If not at the last slot
rho_i = rho(i, N, p_start, p_end)
continue_value = rho_i * V[i + 1] + (1 - rho_i) * (
V[i + 1] - cost_of_continuing
)
else:
continue_value = 0 # Can't continue from the last slot
if reward_func == "i":
park_value = i + 1 # Reward for parking now
elif reward_func == "2i":
park_value = 2 * (i + 1)
elif reward_func == "i^2":
park_value = (i + 1) ** 2
else:
raise ValueError("Invalid reward function")
V[i] = max(park_value, continue_value - cost_of_continuing)
policy[i] = park_value >= (continue_value - cost_of_continuing)
else:
V[i] = V[i + 1] - cost_of_continuing # Adjust for the cost of continuing
policy[i] = 0

return policy


def backward_induction(N, c, p_start=0.9, p_end=0.1, reward_func="i"):
V_free = np.zeros(N + 1) # Value when the slot is free
V_taken = np.zeros(N + 1) # Value when the slot is taken
policy = [None] * N # Optimal action for each slot
V_free = np.zeros(N + 1)
V_taken = np.zeros(N + 1)
policy = [None] * N

# Boundary conditions
V_free[N] = N # Value of parking in the last free slot
V_taken[N] = 0 # Value of the last slot being taken
V_free[N] = N
V_taken[N] = 0

for i in range(N - 1, -1, -1):
prob_free_next = rho(i + 1, N, p_start, p_end)
Expand Down Expand Up @@ -484,9 +407,6 @@ def plot_parking_lot(parking_lot, policy):
if "parking_lot" not in st.session_state:
st.error("Please generate a parking lot first.")
st.stop()
# policy = value_iteration(
# st.session_state.parking_lot, cost_of_continuing, p_start, p_end, reward_func
# )
values, policy = backward_induction(
N, cost_of_continuing, p_start, p_end, reward_func
)
Expand Down

0 comments on commit e109e5b

Please sign in to comment.