-
Notifications
You must be signed in to change notification settings - Fork 100
/
test.py
133 lines (104 loc) · 4.3 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gym
import time
import random
import numpy as np
import tensorflow as tf
from skimage.color import rgb2gray
from skimage.transform import resize
from tensorflow.keras.layers import Conv2D, Dense, Flatten
# 상태가 입력, 큐함수가 출력인 인공신경망 생성
class DQN(tf.keras.Model):
def __init__(self, action_size, state_size):
super(DQN, self).__init__()
self.conv1 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu',
input_shape=state_size)
self.conv2 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu')
self.conv3 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu')
self.flatten = Flatten()
self.fc = Dense(512, activation='relu')
self.fc_out = Dense(action_size)
def call(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.flatten(x)
x = self.fc(x)
q = self.fc_out(x)
return q
# 브레이크아웃 예제에서의 DQN 에이전트
class DQNAgent:
def __init__(self, action_size, state_size, model_path):
self.render = False
# 상태와 행동의 크기 정의
self.state_size = state_size
self.action_size = action_size
self.epsilon = 0.02
# 모델과 타깃 모델 생성
self.model = DQN(action_size, state_size)
self.model.load_weights(model_path)
# 입실론 탐욕 정책으로 행동 선택
def get_action(self, history):
history = np.float32(history / 255.0)
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
else:
q_value = self.model(history)
return np.argmax(q_value[0])
def pre_processing(observe):
processed_observe = np.uint8(
resize(rgb2gray(observe), (84, 84), mode='constant') * 255)
return processed_observe
if __name__ == "__main__":
# 환경 세팅
env = gym.make("BreakoutDeterministic-v4")
render = True
# 테스트를 위한 에이전트 생성
state_size = (84, 84, 4)
action_size = 3
model_path = './save_model/trained/model'
agent = DQNAgent(action_size, state_size, model_path)
# 불필요한 행동을 없애주기 위한 딕셔너리 선언
action_dict = {0:1, 1:2, 2:3, 3:3}
num_episode = 10
for e in range(num_episode):
done = False
dead = False
score, start_life = 0, 5
# env 초기화
observe = env.reset()
# 랜덤으로 뽑힌 값 만큼의 프레임동안 움직이지 않음
for _ in range(random.randint(1, 30)):
observe, _, _, _ = env.step(1)
# 프레임을 전처리 한 후 4개의 상태를 쌓아서 입력값으로 사용.
state = pre_processing(observe)
history = np.stack([state, state, state, state], axis=2)
history = np.reshape([history], (1, 84, 84, 4))
while not done:
if render:
env.render()
time.sleep(0.05)
# 바로 전 history를 입력으로 받아 행동을 선택
action = agent.get_action(history)
# 1: 정지, 2: 왼쪽, 3: 오른쪽
real_action = action_dict[action]
# 죽었을 때 시작하기 위해 발사 행동을 함
if dead:
action, real_action, dead = 0, 1, False
# 선택한 행동으로 환경에서 한 타임스텝 진행
observe, reward, done, info = env.step(real_action)
# 각 타임스텝마다 상태 전처리
next_state = pre_processing(observe)
next_state = np.reshape([next_state], (1, 84, 84, 1))
next_history = np.append(next_state, history[:, :, :, :3], axis=3)
if start_life > info['ale.lives']:
dead, start_life = True, info['ale.lives']
score += reward
if dead:
history = np.stack((next_state, next_state,
next_state, next_state), axis=2)
history = np.reshape([history], (1, 84, 84, 4))
else:
history = next_history
if done:
# 각 에피소드 당 테스트 정보를 기록
print("episode: {:3d} | score : {:4.1f}".format(e, score))