forked from Cernewein/heating-RL-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
environment.py
147 lines (107 loc) · 6.23 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import pickle
import time
from vars import *
import random
class Building:
""" This class represents the building that has to be controlled. Its dynamics are modelled based on an RC analogy.
When instanciated, it initialises the inside temperature to 21°C, the envelope temperature to 20, and resets the done
and time variables.
"""
def __init__(self, dynamic=False, eval=False):
# If variable sun power, outside temperature, price should be used
self.eval = eval
self.dynamic = dynamic
### Initiliazing the temperatures
self.inside_temperature = 21.0 #np.random.randint(19,24)
self.envelope_temperature = 20
### Selecting a random set for the outside temperatures based on a dataset
# If we are in eval mode, select the month of january
if self.eval:
self.random_day = 0 # First day of the year
else:
# Else select November/December for training
self.random_day=random.randint(304,365-NUM_HOURS//24-1)*24
self.ambient_temperatures = pd.read_csv('data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day+NUM_HOURS+1,2]
self.ambient_temperature=self.ambient_temperatures[self.random_day]
### Based on the same day, choose the sun irradiation for the episode
self.sun_powers = pd.read_csv('data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day+NUM_HOURS+1,3]
self.sun_power = self.sun_powers[self.random_day]
### Based on the same day, choose the hourly prices for the episode
self.prices = pd.read_csv('data/environment/2014_DK2_spot_prices.csv',
header = 0).iloc[self.random_day:self.random_day+NUM_HOURS+1,1]
self.price = self.prices[self.random_day]
self.done = False
self.time=0
def heat_pump_power(self, phi_e):
"""Takes an electrical power flow and converts it to a heat flow.
:param phi_e: The electrical power
:type phi_e: Float
:return: Returns the heat flow as an integer
"""
return phi_e*(0.0606*self.ambient_temperature+2.612)
def step(self, action):
"""
:param action: The chosen action - is the index of selected action from the action space.
:type action: Integer
:return: Returns the new state after a step, the reward for the action and the done state
"""
#delta = 1/(R_IA*C_I) * (T_AMBIENT - self.inside_temperature) + 1/(R_IE*C_I)*(self.envelope_temperature - self.inside_temperature) + choice * self.heat_pump_power(NOMINAL_HEAT_PUMP_POWER)/C_I
#delta_envelope = 1/(R_IE*C_E) * (self.inside_temperature - self.envelope_temperature) + 1/(R_EA*C_E) * (T_AMBIENT - self.envelope_temperature)
delta = 1 / (R_IA * C_I) * (self.ambient_temperature - self.inside_temperature) + \
self.heat_pump_power(NOMINAL_HEAT_PUMP_POWER*action)/C_I + A_w*self.sun_power/C_I
#self.envelope_temperature += delta_envelope* TIME_STEP_SIZE
self.inside_temperature += delta * TIME_STEP_SIZE
r = self.reward(action)
self.time +=1
if self.dynamic:
# Updating the outside temperature with the new temperature
self.ambient_temperature = self.ambient_temperatures[self.random_day + (self.time * TIME_STEP_SIZE)//3600]
self.sun_power = self.sun_powers[self.random_day + (self.time * TIME_STEP_SIZE)//3600]
self.price = self.prices[self.random_day + (self.time * TIME_STEP_SIZE) // 3600]
if self.time >= NUM_TIME_STEPS:
self.done = True
return [self.inside_temperature, self.ambient_temperature, self.sun_power, self.price ], r, self.done #, self.time % int(24*3600//TIME_STEP_SIZE)
def reward(self,action):
"""
Returns the received value for the chosen action and transition to next state
:param action: The selected action
:return: Returns the reward for that action
"""
if self.ambient_temperature <= T_MAX:
penalty = np.maximum(0,self.inside_temperature-T_MAX) + np.maximum(0,T_MIN-self.inside_temperature)
penalty *= COMFORT_PENALTY
else:
penalty = 0
#print(-action*PRICE_PENALTY*NOMINAL_HEAT_PUMP_POWER/(1e6)*self.price*TIME_STEP_SIZE/3600)
reward = -action*PRICE_PENALTY*NOMINAL_HEAT_PUMP_POWER/(1e6)*self.price*TIME_STEP_SIZE/3600 - penalty
return reward
def reset(self):
"""
This method is resetting the attributes of the building.
:return: Returns the resetted inside temperature, ambient temperature and sun power
"""
self.inside_temperature = 21
if self.eval:
self.random_day=0
else:
self.random_day = random.randint(304, 365 - NUM_HOURS // 24 - 1) * 24
self.ambient_temperatures = pd.read_csv('data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day + NUM_HOURS + 1, 2]
self.ambient_temperature = self.ambient_temperatures[self.random_day]
## Resetting the sun power
self.sun_powers = pd.read_csv('data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day + NUM_HOURS + 1, 3]
self.sun_power = self.sun_powers[self.random_day]
## Resetting the prices
self.prices = pd.read_csv('data/environment/2014_DK2_spot_prices.csv',
header=0).iloc[self.random_day:self.random_day + NUM_HOURS + 1, 1]
self.price = self.prices[self.random_day]
self.done = False
self.time = 0
return [self.inside_temperature,self.ambient_temperature,self.sun_power,self.price]#,self.time]