This repository has been archived by the owner on Jan 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
113 lines (89 loc) · 2.24 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from pynput.keyboard import Key
import numpy as np
import threading
import time
from utils.agent import Agent
from utils.keyboard import Keyboard
from utils.reader import Reader
# env vars
orbDist = None
# screen reader
reader = Reader()
reader.showWindow = False
reader.printDebug = False
reader.selectMonitor(2)
reader.calibrate()
readingThread = threading.Thread(target=reader.start)
# agent
agent = Agent({
'adamConfig': {
'stepSize': 1e-3,
'betaM': 0.9,
'betaV': 0.999,
'epsilon': 0.001
},
'nnConfig': {
'stateCount': 12,
'hiddenUnitCount': [128, 64, 16],
'actionCount': 4,
},
'rbConfig': {
'rbSize': 50000,
'batchSize': 8,
'replayUpdatePerStep': 4,
},
'gamma': 0.95,
'tau': 0.001
})
# keyboard controller
controller = Keyboard({
0: 'w',
1: 's',
2: 'a',
3: 'd',
4: Key.space,
})
def main():
# variable binding
global agent, orbPos, readingThread
readingThread.start()
# print ready up prompt
print('move your cursor to window')
time.sleep(1)
print('training starts in\n3')
time.sleep(1)
print('2')
time.sleep(1)
print('1')
# start training
controller.apply(4)
state, orbDist, newOrb, gameover = reader.getState()
agent.start(state)
eps = 0
while True:
tempState, newOrbDist, newOrb, gameover = reader.getState()
# reset if gameover
if gameover:
agent.end(-100)
print('Eps: {} | reward: {}'.format(eps, agent.rSum))
eps += 1
if eps % 250 == 0:
agent.saveNN(eps);
time.sleep(0.5)
controller.apply(4)
time.sleep(0.1)
state, orbDist, newOrb, gameover = reader.getState()
agent.start(state)
# step controller
elif not np.array_equal(state, tempState) or orbDist != newOrbDist:
state = tempState
reward = -1
if newOrb:
reward = 10
elif orbDist > newOrbDist:
reward = 1
orbDist = newOrbDist
action = agent.step(reward, state)
controller.apply(action)
if __name__ == "__main__":
main()