-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_il.py
127 lines (99 loc) · 3.75 KB
/
train_il.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from utils import *
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
class BCModel(nn.Module):
def __init__(self, in_size, out_size):
super(BCModel, self).__init__()
######### Your code starts here #########
# We want to define and initialize the weights & biases of the neural network.
# - in_size is dim(O)
# - out_size is dim(A) = 2
########## Your code ends here ##########
def forward(self, x):
######### Your code starts here #########
# We want to perform a forward-pass of the network. Using the weights and biases, this function should give the network output for x where:
# x is a (?,|O|) tensor that keeps a batch of observations
########## Your code ends here ##########
def run_training(data, args):
"""
Trains a feedforward NN.
"""
params = {
"train_batch_size": 4096,
}
# import ipdb
# ipdb.set_trace()
in_size = data["x_train"].shape[-1]
out_size = data["y_train"].shape[-1]
bc_model = BCModel(in_size, out_size)
if args.restore:
ckpt_path = (
"./policies/" + args.scenario.lower() + "_" + args.goal.lower() + "_IL"
)
bc_model.load_state_dict(torch.load(ckpt_path))
optimizer = optim.Adam(bc_model.parameters(), lr=args.lr)
def train_step(x, y):
######### Your code starts here #########
"""
We want to perform a single training step (for one batch):
1. Make a forward pass through the model
2. Calculate the loss for the output of the forward pass
We want to compute the loss between y_est and y where
- y_est is the output of the network for a batch of observations,
- y is the actions the expert took for the corresponding batch of observations
At the end your code should return the scalar loss value.
HINT: Remember, you can penalize steering (0th dimension) and throttle (1st dimension) unequally
"""
########## Your code ends here ##########
return loss
# load dataset
dataset = TensorDataset(
torch.Tensor(data["x_train"]), torch.Tensor(data["y_train"])
)
dataloader = DataLoader(dataset, batch_size=params["train_batch_size"])
# run training
bc_model.train()
for epoch in range(args.epochs):
epoch_loss = 0.0
for x, y in dataloader:
optimizer.zero_grad()
batch_loss = train_step(x, y)
batch_loss.backward()
optimizer.step()
epoch_loss += batch_loss.item()
epoch_loss /= len(dataloader)
print(f"Epoch {epoch + 1}, Loss: {epoch_loss}")
ckpt_path = "./policies/" + args.scenario.lower() + "_" + args.goal.lower() + "_IL"
torch.save(bc_model.state_dict(), ckpt_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--scenario",
type=str,
help="intersection, circularroad, lanechange",
default="intersection",
)
parser.add_argument(
"--goal",
type=str,
help="left, straight, right, inner, outer, all",
default="all",
)
parser.add_argument(
"--epochs", type=int, help="number of epochs for training", default=1000
)
parser.add_argument(
"--lr", type=float, help="learning rate for Adam optimizer", default=5e-3
)
parser.add_argument("--restore", action="store_true", default=False)
args = parser.parse_args()
maybe_makedirs("./policies")
data = load_data(args)
run_training(data, args)