forked from wang-xinyu/tensorrtx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbackbone.hpp
executable file
·229 lines (201 loc) · 7.32 KB
/
backbone.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#pragma once
#include <vector>
#include <map>
#include <string>
#include "common.hpp"
/* when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution.
set false when use backbone from torchvision*/
#define STRIDE_IN_1X1 true
enum RESNETTYPE {
R18 = 0,
R34,
R50,
R101,
R152
};
const std::map<RESNETTYPE, std::vector<int>> num_blocks_per_stage = {
{R18, {2, 2, 2, 2}},
{R34, {3, 4, 6, 3}},
{R50, {3, 4, 6, 3}},
{R101, {3, 4, 23, 3}},
{R152, {3, 8, 36, 3}}
};
ILayer* BasicStem(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname, ITensor& input,
int out_channels,
int group_num = 1) {
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 7, 7 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ 2, 2 });
conv1->setPaddingNd(DimsHW{ 3, 3 });
conv1->setNbGroups(group_num);
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
auto max_pool2d = network->addPoolingNd(*r1->getOutput(0), PoolingType::kMAX, DimsHW{ 3, 3 });
max_pool2d->setStrideNd(DimsHW{ 2, 2 });
max_pool2d->setPaddingNd(DimsHW{ 1, 1 });
// auto mp_dim = max_pool2d->getOutput(0)->getDimensions();
return max_pool2d;
}
ITensor* BasicBlock(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int in_channels,
int out_channels,
int stride = 1) {
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(input, out_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ stride, stride });
conv1->setPaddingNd(DimsHW{ 1, 1 });
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
// conv2
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), out_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv2.weight"],
weightMap[lname + ".conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{ 1, 1 });
conv2->setPaddingNd(DimsHW{ 1, 1 });
// shortcut
ITensor* shortcut_value = nullptr;
if (in_channels != out_channels) {
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
weightMap[lname + ".shortcut.weight"],
weightMap[lname + ".shortcut.bias"]);
assert(shortcut);
shortcut->setStrideNd(DimsHW{ stride, stride });
shortcut_value = shortcut->getOutput(0);
} else {
shortcut_value = &input;
}
// add
auto ew = network->addElementWise(*conv2->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
assert(ew);
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
assert(r3);
return r3->getOutput(0);
}
ITensor* BottleneckBlock(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int in_channels,
int bottleneck_channels,
int out_channels,
int stride = 1,
int dilation = 1,
int group_num = 1) {
int stride_1x1 = STRIDE_IN_1X1 ? stride : 1;
int stride_3x3 = STRIDE_IN_1X1 ? 1 : stride;
// conv1
IConvolutionLayer* conv1 = network->addConvolutionNd(input, bottleneck_channels, DimsHW{ 1, 1 },
weightMap[lname + ".conv1.weight"],
weightMap[lname + ".conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{ stride_1x1, stride_1x1 });
conv1->setNbGroups(group_num);
auto r1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(r1);
// conv2
IConvolutionLayer* conv2 = network->addConvolutionNd(*r1->getOutput(0), bottleneck_channels, DimsHW{ 3, 3 },
weightMap[lname + ".conv2.weight"],
weightMap[lname + ".conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{ stride_3x3, stride_3x3 });
conv2->setPaddingNd(DimsHW{ 1 * dilation, 1 * dilation });
conv2->setDilationNd(DimsHW{ dilation, dilation });
conv2->setNbGroups(group_num);
auto r2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
assert(r2);
// conv3
IConvolutionLayer* conv3 = network->addConvolutionNd(*r2->getOutput(0), out_channels, DimsHW{ 1, 1 },
weightMap[lname + ".conv3.weight"],
weightMap[lname + ".conv3.bias"]);
assert(conv3);
conv3->setStrideNd(DimsHW{ 1, 1 });
conv3->setNbGroups(group_num);
// shortcut
ITensor* shortcut_value = nullptr;
if (in_channels != out_channels) {
auto shortcut = network->addConvolutionNd(input, out_channels, DimsHW{ 1, 1 },
weightMap[lname + ".shortcut.weight"],
weightMap[lname + ".shortcut.bias"]);
assert(shortcut);
shortcut->setStrideNd(DimsHW{stride, stride});
shortcut->setNbGroups(group_num);
shortcut_value = shortcut->getOutput(0);
} else {
shortcut_value = &input;
}
// add
auto ew = network->addElementWise(*conv3->getOutput(0), *shortcut_value, ElementWiseOperation::kSUM);
assert(ew);
auto r3 = network->addActivation(*ew->getOutput(0), ActivationType::kRELU);
assert(r3);
return r3->getOutput(0);
}
ITensor* MakeStage(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
const std::string& lname,
ITensor& input,
int stage,
RESNETTYPE resnet_type,
int in_channels,
int bottleneck_channels,
int out_channels,
int first_stride = 1,
int dilation = 1) {
ITensor* out = &input;
for (int i = 0; i < stage; i++) {
std::string layerName = lname + "." + std::to_string(i);
int stride = i == 0 ? first_stride : 1;
if (resnet_type == R18 || resnet_type == R34)
out = BasicBlock(network, weightMap, layerName, *out, in_channels, out_channels, stride);
else
out = BottleneckBlock(network, weightMap, layerName, *out,
in_channels, bottleneck_channels, out_channels, stride, dilation);
in_channels = out_channels;
}
return out;
}
ITensor* BuildResNet(INetworkDefinition *network,
std::map<std::string, Weights>& weightMap,
ITensor& input,
RESNETTYPE resnet_type,
int stem_out_channels,
int bottleneck_channels,
int res2_out_channels,
int res5_dilation = 1) {
assert(res5_dilation == 1 || res5_dilation == 2); // "res5_dilation must be 1 or 2"
if (resnet_type == R18 || resnet_type == R34) {
assert(res2_out_channels == 64); // "res2_out_channels must be 64 for R18/R34"
assert(res5_dilation == 1); // "res5_dilation must be 1 for R18/R34"
}
int out_channels = res2_out_channels;
ITensor* out = nullptr;
// stem
auto stem = BasicStem(network, weightMap, "backbone.stem", input, stem_out_channels);
out = stem->getOutput(0);
// res
for (int i = 0; i < 3; i++) {
int dilation = (i == 3) ? res5_dilation : 1;
int first_stride = (i == 0 || (i == 3 && dilation == 2)) ? 1 : 2;
out = MakeStage(network, weightMap,
"backbone.res" + std::to_string(i + 2), *out,
num_blocks_per_stage.at(resnet_type)[i], resnet_type,
stem_out_channels, bottleneck_channels, out_channels,
first_stride, dilation);
stem_out_channels = out_channels;
bottleneck_channels *= 2;
out_channels *= 2;
}
return out;
}