forked from TreB1eN/Pytorch0.4.1_Openpose
-
Notifications
You must be signed in to change notification settings - Fork 0
/
openpose.py
969 lines (871 loc) · 48.2 KB
/
openpose.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
import cv2
import math
import time
import numpy as np
from scipy.ndimage.filters import gaussian_filter
import os
import torch
import torch.nn.functional as F
from tensorboardX import SummaryWriter
from entity import params, JointType
from models.CocoPoseNet import CocoPoseNet, compute_loss
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.optim import Adam
from datetime import datetime
import time
from matplotlib import pyplot as plt
def get_time():
return (str(datetime.now())[:-10]).replace(' ','-').replace(':','-')
class Openpose(object):
def __init__(self, arch='posenet', weights_file=None, training = True):
self.arch = arch
if weights_file:
self.model = params['archs'][arch]()
self.model.load_state_dict(torch.load(weights_file))
else:
self.model = params['archs'][arch](params['pretrained_path'])
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = self.model.to(self.device)
if training:
from pycocotools.coco import COCO
from coco_dataset import CocoDataset
for para in self.model.base.vgg_base.parameters():
para.requires_grad = False
coco_train = COCO(os.path.join(params['coco_dir'], 'annotations/person_keypoints_train2017.json'))
coco_val = COCO(os.path.join(params['coco_dir'], 'annotations/person_keypoints_val2017.json'))
self.train_loader = DataLoader(CocoDataset(coco_train, params['insize']),
params['batch_size'],
shuffle=True,
pin_memory=False,
num_workers=params['num_workers'])
self.val_loader = DataLoader(CocoDataset(coco_val, params['insize'], mode = 'val'),
params['batch_size'],
shuffle=False,
pin_memory=False,
num_workers=params['num_workers'])
self.train_length = len(self.train_loader)
self.val_length = len(self.val_loader)
self.step = 0
self.writer = SummaryWriter(params['log_path'])
self.board_loss_every = self.train_length // params['board_loss_interval']
self.evaluate_every = self.train_length // params['eval_interval']
self.board_pred_image_every = self.train_length // params['board_pred_image_interval']
self.save_every = self.train_length // params['save_interval']
self.optimizer = Adam([
{'params' : [*self.model.parameters()][20:24], 'lr' : params['lr'] / 4},
{'params' : [*self.model.parameters()][24:], 'lr' : params['lr']}])
# test only codes
# self.board_loss_every = 5
# self.evaluate_every = 5
# self.board_pred_image_every = 5
# self.save_every = 5
def board_scalars(self, key, loss, paf_log, heatmap_log):
self.writer.add_scalar('{}_loss'.format(key), loss, self.step)
for stage, (paf_loss, heatmap_loss) in enumerate(zip(paf_log, heatmap_log)):
self.writer.add_scalar('{}_paf_loss_stage{}'.format(key, stage), paf_loss, self.step)
self.writer.add_scalar('{}_heatmap_loss_stage{}'.format(key, stage), heatmap_loss, self.step)
def evaluate(self, num = 50):
self.model.eval()
count = 0
running_loss = 0.
running_paf_log = 0.
running_heatmap_log = 0.
with torch.no_grad():
for imgs, pafs, heatmaps, ignore_mask in iter(self.val_loader):
imgs, pafs, heatmaps, ignore_mask = imgs.to(self.device), pafs.to(self.device), heatmaps.to(self.device), ignore_mask.to(self.device)
pafs_ys, heatmaps_ys = self.model(imgs)
total_loss, paf_loss_log, heatmap_loss_log = compute_loss(pafs_ys, heatmaps_ys, pafs, heatmaps, ignore_mask)
running_loss += total_loss.item()
running_paf_log += paf_loss_log
running_heatmap_log += heatmap_loss_log
count += 1
if count >= num:
break
return running_loss / num, running_paf_log / num, running_heatmap_log / num
def save_state(self, val_loss, to_save_folder=False, model_only=False):
if to_save_folder:
save_path = params['work_space']/'save'
else:
save_path = params['work_space']/'model'
time = get_time()
torch.save(
self.model.state_dict(), save_path /
('model_{}_val_loss:{}_step:{}.pth'.format(time, val_loss, self.step)))
if not model_only:
torch.save(
self.optimizer.state_dict(), save_path /
('optimizer_{}_val_loss:{}_step:{}.pth'.format(time, val_loss, self.step)))
def load_state(self, fixed_str, from_save_folder=False, model_only=False):
if from_save_folder:
save_path = params['work_space']/'save'
else:
save_path = params['work_space']/'model'
self.model.load_state_dict(torch.load(save_path/'model_{}'.format(fixed_str)))
print('load model_{}'.format(fixed_str))
if not model_only:
self.optimizer.load_state_dict(torch.load(save_path/'optimizer_{}'.format(fixed_str)))
print('load optimizer_{}'.format(fixed_str))
def resume_training_load(self, from_save_folder=False):
if from_save_folder:
save_path = params['work_space']/'save'
else:
save_path = params['work_space']/'model'
sorted_files = sorted([*save_path.iterdir()], key=lambda x: os.path.getmtime(x), reverse=True)
seeking_flag = True
index = 0
while seeking_flag:
if index > len(sorted_files) - 2:
break
file_a = sorted_files[index]
file_b = sorted_files[index + 1]
if file_a.name.startswith('model'):
fix_str = file_a.name[6:]
self.step = int(fix_str.split(':')[-1].split('.')[0]) + 1
if file_b.name == ''.join(['optimizer', '_', fix_str]):
if self.step > 2000:
for para in self.model.base.vgg_base.parameters():
para.requires_grad = True
self.optimizer.add_param_group({'params' : [*self.model.base.vgg_base.parameters()], 'lr' : params['lr'] / 4})
self.load_state(fix_str, from_save_folder)
print(self.optimizer)
return
else:
index += 1
continue
elif file_a.name.startswith('optimizer'):
fix_str = file_a.name[10:]
self.step = int(fix_str.split(':')[-1].split('.')[0]) + 1
if file_b.name == ''.join(['model', '_', fix_str]):
if self.step > 2000:
for para in self.model.base.vgg_base.parameters():
para.requires_grad = True
self.optimizer.add_param_group({'params' : [*self.model.base.vgg_base.parameters()], 'lr' : params['lr'] / 4})
self.load_state(fix_str, from_save_folder)
print(self.optimizer)
return
else:
index += 1
continue
else:
index += 1
continue
print('no available files founded')
return
def find_lr(self,
init_value=1e-8,
final_value=10.,
beta=0.98,
bloding_scale=4.,
num=None):
if not num:
num = len(self.train_loader)
mult = (final_value / init_value)**(1 / num)
lr = init_value
for params in self.optimizer.param_groups:
params['lr'] = lr
self.model.train()
avg_loss = 0.
best_loss = 0.
batch_num = 0
losses = []
log_lrs = []
for i, (imgs, pafs, heatmaps, ignore_mask) in tqdm(enumerate(self.train_loader), total=num):
imgs, pafs, heatmaps, ignore_mask = imgs.to(self.device), pafs.to(self.device), heatmaps.to(self.device), ignore_mask.to(self.device)
self.optimizer.zero_grad()
batch_num += 1
pafs_ys, heatmaps_ys = self.model(imgs)
loss, _, _ = compute_loss(pafs_ys, heatmaps_ys, pafs, heatmaps, ignore_mask)
self.optimizer.step()
#Compute the smoothed loss
avg_loss = beta * avg_loss + (1 - beta) * loss.item()
self.writer.add_scalar('avg_loss', avg_loss, batch_num)
smoothed_loss = avg_loss / (1 - beta**batch_num)
self.writer.add_scalar('smoothed_loss', smoothed_loss,batch_num)
#Stop if the loss is exploding
if batch_num > 1 and smoothed_loss > bloding_scale * best_loss:
print('exited with best_loss at {}'.format(best_loss))
plt.plot(log_lrs[10:-5], losses[10:-5])
return log_lrs, losses
#Record the best loss
if smoothed_loss < best_loss or batch_num == 1:
best_loss = smoothed_loss
#Store the values
losses.append(smoothed_loss)
log_lrs.append(math.log10(lr))
self.writer.add_scalar('log_lr', math.log10(lr), batch_num)
#Do the SGD step
#Update the lr for the next step
loss.backward()
self.optimizer.step()
lr *= mult
for params in self.optimizer.param_groups:
params['lr'] = lr
if batch_num > num:
plt.plot(log_lrs[10:-5], losses[10:-5])
return log_lrs, losses
def lr_schedule(self):
for params in self.optimizer.param_groups:
params['lr'] /= 10.
print(self.optimizer)
def train(self, resume = False):
running_loss = 0.
running_paf_log = 0.
running_heatmap_log = 0.
if resume:
self.resume_training_load()
for epoch in range(60):
for imgs, pafs, heatmaps, ignore_mask in tqdm(iter(self.train_loader)):
if self.step == 2000:
for para in self.model.base.vgg_base.parameters():
para.requires_grad = True
self.optimizer.add_param_group({'params' : [*self.model.base.vgg_base.parameters()], 'lr' : params['lr'] / 4})
if self.step == 100000 or self.step == 200000:
self.lr_schedule()
imgs, pafs, heatmaps, ignore_mask = imgs.to(self.device), pafs.to(self.device), heatmaps.to(self.device), ignore_mask.to(self.device)
self.optimizer.zero_grad()
pafs_ys, heatmaps_ys = self.model(imgs)
total_loss, paf_loss_log, heatmap_loss_log = compute_loss(pafs_ys, heatmaps_ys, pafs, heatmaps, ignore_mask)
total_loss.backward()
self.optimizer.step()
running_loss += total_loss.item()
running_paf_log += paf_loss_log
running_heatmap_log += heatmap_loss_log
if (self.step % self.board_loss_every == 0) & (self.step != 0):
self.board_scalars('train',
running_loss / self.board_loss_every,
running_paf_log / self.board_loss_every,
running_heatmap_log / self.board_loss_every)
running_loss = 0.
running_paf_log = 0.
running_heatmap_log = 0.
if (self.step % self.evaluate_every == 0) & (self.step != 0):
val_loss, paf_loss_val_log, heatmap_loss_val_log = self.evaluate(num = params['eva_num'])
self.model.train()
self.board_scalars('val', val_loss, paf_loss_val_log, heatmap_loss_val_log)
if (self.step % self.board_pred_image_every == 0) & (self.step != 0):
self.model.eval()
with torch.no_grad():
for i in range(20):
img_id = self.val_loader.dataset.imgIds[i]
img_path = os.path.join(params['coco_dir'], 'val2017', self.val_loader.dataset.coco.loadImgs([img_id])[0]['file_name'])
img = cv2.imread(img_path)
# inference
poses, _ = self.detect(img)
# draw and save image
img = draw_person_pose(img, poses)
img = torch.tensor(img.transpose(2,0,1))
self.writer.add_image('pred_image_{}'.format(i), img, global_step=self.step)
self.model.train()
if (self.step % self.save_every == 0) & (self.step != 0):
self.save_state(val_loss)
self.step += 1
if self.step > 300000:
break
def pad_image(self, img, stride, pad_value):
h, w, _ = img.shape
pad = [0] * 2
pad[0] = (stride - (h % stride)) % stride # down
pad[1] = (stride - (w % stride)) % stride # right
img_padded = np.zeros((h+pad[0], w+pad[1], 3), 'uint8') + pad_value
img_padded[:h, :w, :] = img.copy()
return img_padded, pad
def compute_optimal_size(self, orig_img, img_size, stride=8):
"""画像の幅と高さがstrideの倍数になるように調節する"""
orig_img_h, orig_img_w, _ = orig_img.shape
aspect = orig_img_h / orig_img_w
if orig_img_h < orig_img_w:
img_h = img_size
img_w = np.round(img_size / aspect).astype(int)
surplus = img_w % stride
if surplus != 0:
img_w += stride - surplus
else:
img_w = img_size
img_h = np.round(img_size * aspect).astype(int)
surplus = img_h % stride
if surplus != 0:
img_h += stride - surplus
return (img_w, img_h)
def compute_peaks_from_heatmaps(self, heatmaps):
"""all_peaks: shape = [N, 5], column = (jointtype, x, y, score, index)"""
#heatmaps.shape : (19, 584, 584)
#heatmaps[-1]是背景,训练时有用,推断时没用,这里去掉
heatmaps = heatmaps[:-1]
all_peaks = []
peak_counter = 0
for i , heatmap in enumerate(heatmaps):
heatmap = gaussian_filter(heatmap, sigma=params['gaussian_sigma'])
'''
可以和下面的GPU codes对比一下,
这里的gaussian_filter其实就是拿一个gaussian_kernel在输出的heatmaps上depth为1的平面卷积,
因为网络拟合的heatmap也是在目标点上生成的gaussian heatmap,
这样卷积比较合理地找到最贴近目标点的坐标
'''
map_left = np.zeros(heatmap.shape)
map_right = np.zeros(heatmap.shape)
map_top = np.zeros(heatmap.shape)
map_bottom = np.zeros(heatmap.shape)
'''
我的理解,其实这里left和top, right和bottom搞反了,但是不影响最终结果
'''
map_left[1:, :] = heatmap[:-1, :]
map_right[:-1, :] = heatmap[1:, :]
map_top[:, 1:] = heatmap[:, :-1]
map_bottom[:, :-1] = heatmap[:, 1:]
peaks_binary = np.logical_and.reduce((
heatmap > params['heatmap_peak_thresh'],
heatmap > map_left,
heatmap > map_right,
heatmap > map_top,
heatmap > map_bottom,
))
'''
这一步操作厉害了,找的是heatmap上满足如下两个条件的所有的点:
1. 该点的值大于 params['heatmap_peak_thresh'],默认是0.05
2. 其次,该点的值比它上、下、左、右四个点的值都要大
满足以上条件为True,否则False
'''
peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # [(x, y), (x, y)...]のpeak座標配列
'''np.nonzero返回的坐标格式是[y,x],这里被改成了[x,y]'''
peaks_with_score = [(i,) + peak_pos + (heatmap[peak_pos[1], peak_pos[0]],) for peak_pos in peaks]
'''
[(0, 387, 47, 0.050346997),
(0, 388, 47, 0.050751492),
(0, 389, 47, 0.051055912),
.....]
(关节点的index, x坐标, y坐标, heatmap value)
'''
peaks_id = range(peak_counter, peak_counter + len(peaks_with_score))
peaks_with_score_and_id = [peaks_with_score[i] + (peaks_id[i], ) for i in range(len(peaks_id))]
'''
[(0, 387, 47, 0.050346997, 0),
(0, 388, 47, 0.050751492, 1),
(0, 389, 47, 0.051055912, 2),
(0, 390, 47, 0.051255725, 3),
......]
这一步还把序号带上了
'''
peak_counter += len(peaks_with_score_and_id)
all_peaks.append(peaks_with_score_and_id)
all_peaks = np.array([peak for peaks_each_category in all_peaks for peak in peaks_each_category])
'''还可以这样写啊,两层语法糖'''
return all_peaks
def compute_candidate_connections(self, paf, cand_a, cand_b, img_len, params):
candidate_connections = []
for joint_a in cand_a:
for joint_b in cand_b: # jointは(x, y)座標
vector = joint_b[:2] - joint_a[:2]
norm = np.linalg.norm(vector)
if norm == 0:
continue
ys = np.linspace(joint_a[1], joint_b[1], num=params['n_integ_points'])
xs = np.linspace(joint_a[0], joint_b[0], num=params['n_integ_points'])
integ_points = np.stack([ys, xs]).T.round().astype('i')
'''
# joint_aとjoint_bの2点間を結ぶ線分上の座標点 [[x1, y1], [x2, y2]...]
# 连接joint_a和joint_b的线段上的坐标点[[x1,y1],[x2,y2] ...]
params['n_integ_points'] = 10
integ_points =
array([[ 32, 242],
[ 36, 241],
[ 39, 240],
[ 43, 239],
[ 47, 238],
[ 50, 236],
[ 54, 235],
[ 58, 234],
[ 61, 233],
[ 65, 232]], dtype=int32)
通过在连接joint_a和joint_b的线段上sample 10个点,取整之后得到坐标值
'''
paf_in_edge = np.hstack([paf[0][np.hsplit(integ_points, 2)], paf[1][np.hsplit(integ_points, 2)]])
'''
paf_in_edge.shape : (10, 2)
paf_in_edge代表在这10个点上的paf预测值
'''
unit_vector = vector / norm
inner_products = np.dot(paf_in_edge, unit_vector)
integ_value = inner_products.sum() / len(inner_products)
'''
以上三行相当于论文中的公式10
通过sample的方法来代替求积分
'''
'''
# vectorの長さが基準値以上の時にペナルティを与える
# 当vector的长度大于或等于参考值时给予惩罚
params['limb_length_ratio'] = 1
params['length_penalty_value'] = 1
img_len = 原始图片的width
'''
integ_value_with_dist_prior = integ_value + min(params['limb_length_ratio'] * img_len / norm - params['length_penalty_value'], 0)
'''
params['inner_product_thresh'] = 0.05
params['n_integ_points_thresh'] = 8
以下条件控制表示,
只有当这10个点里面至少有8个点的paf向量值与连接joint_a与joint_b之间的单位向量的点积大于0.05时,
并且这10个点的平均值(近似积分值)> 0时,才认为存在一条可能的connection,并把它记录下来
'''
n_valid_points = sum(inner_products > params['inner_product_thresh'])
if n_valid_points > params['n_integ_points_thresh'] and integ_value_with_dist_prior > 0:
candidate_connections.append([int(joint_a[3]), int(joint_b[3]), integ_value_with_dist_prior])
'''
这里记录下来的是joint_a和joint_b在all_peaks里的序号,还有积分值
joint_a,joint_b是从cand_a, cand_b中枚举出来的,
而cand_a和cand_b都是从all_peaks里面map出来的
'''
candidate_connections = sorted(candidate_connections, key=lambda x: x[2], reverse=True)
'''在所有取到的可能connection里通过积分值的大小排个序'''
return candidate_connections
'''
len(all_connections) = 19
正好代表了19种可能的躯干,每一种躯干里面又是一个代表了所有可能connections的array
比如all_connections[2] =
array([[47. , 51. , 0.86362792],
[46. , 50. , 0.71809054],
[45. , 49. , 0.59873392],
[44. , 48. , 0.3711632 ]])
里面有4个连接,这4个连接有可能属于不同的人的,
然后通过最后一步操作grouping_key_points,把每个人的躯干给组合起来
'''
def compute_connections(self, pafs, all_peaks, img_len, params):
all_connections = []
for i in range(len(params['limbs_point'])):
'''
params['limbs_point']:
[[<JointType.Neck: 1>, <JointType.RightWaist: 8>],
[<JointType.RightWaist: 8>, <JointType.RightKnee: 9>],
[<JointType.RightKnee: 9>, <JointType.RightFoot: 10>],
[<JointType.Neck: 1>, <JointType.LeftWaist: 11>],
[<JointType.LeftWaist: 11>, <JointType.LeftKnee: 12>],
[<JointType.LeftKnee: 12>, <JointType.LeftFoot: 13>],
[<JointType.Neck: 1>, <JointType.RightShoulder: 2>],
[<JointType.RightShoulder: 2>, <JointType.RightElbow: 3>],
[<JointType.RightElbow: 3>, <JointType.RightHand: 4>],
[<JointType.RightShoulder: 2>, <JointType.RightEar: 16>],
[<JointType.Neck: 1>, <JointType.LeftShoulder: 5>],
[<JointType.LeftShoulder: 5>, <JointType.LeftElbow: 6>],
[<JointType.LeftElbow: 6>, <JointType.LeftHand: 7>],
[<JointType.LeftShoulder: 5>, <JointType.LeftEar: 17>],
[<JointType.Neck: 1>, <JointType.Nose: 0>],
[<JointType.Nose: 0>, <JointType.RightEye: 14>],
[<JointType.Nose: 0>, <JointType.LeftEye: 15>],
[<JointType.RightEye: 14>, <JointType.RightEar: 16>],
[<JointType.LeftEye: 15>, <JointType.LeftEar: 17>]]
代表的是limb躯干的数量,也就是PAF的种类,总共19种,
可以理解这个大循环执行的是一个个的任务,
第一次只是执行寻找比如说Neck到右腰的所有的可能的connection,
然后是右腰到右膝的所有可能的connection,总共19次任务
'''
paf_index = [i*2, i*2 + 1]
paf = pafs[paf_index] # shape: (2, 320, 320)
'''
这里paf的channel为什么等于2 ?
因为paf代表的是一个向量场,在躯干覆盖在图片上的这一块区域内,
每个点都代表一个2维向量,
ground truth的paf代表的是从joint_a到joint_b的单位向量(预测的值自然也会与GT接近)
'''
limb_point = params['limbs_point'][i] # example: [<JointType.Neck: 1>, <JointType.RightWaist: 8>]
cand_a = all_peaks[all_peaks[:, 0] == limb_point[0]][:, 1:]
cand_b = all_peaks[all_peaks[:, 0] == limb_point[1]][:, 1:]
'''
all_peaks[:, 0]代表peak归属于哪个joint的序号
cand_a表示candidate_a
表示对应这个关节的所有peak的[x,y,value,idx]
因为一张图里可能有很多个人,
这里对应这个关节的peak点也很可能分别属于不同的人
接下来就要在两两相连的两种关节之间,通过PAF去计算哪些关节是可能相连的
注意每次任务只会仅仅在两种关节间寻找可能的connection
'''
if len(cand_a) > 0 and len(cand_b) > 0:
candidate_connections = self.compute_candidate_connections(paf, cand_a, cand_b, img_len, params)
'''
candidate_connections:
[[9, 42, 0.8681351658332168],
[8, 41, 0.8360657979306498],
[10, 43, 0.7184696600989704],
[7, 40, 0.619533988669367],
[6, 39, 0.25027479198405156]]
############## [index_a, index_b, value]
'''
connections = np.zeros((0, 3))
'''
connections : array([], shape=(0, 3), dtype=float64)
又学到一招,创建一个0行3列的array,相当于一个模板,给后续工作服务
'''
for index_a, index_b, score in candidate_connections:
if index_a not in connections[:, 0] and index_b not in connections[:, 1]:
'''
这个if条件起到了抑制重复连接的作用
index_a代表起始点,index_b代表终点
一个peak点(或者说检测出来的关节点),
在一次任务里只可能有一次机会作为起始点或终点,
因为前面已经根据积分值的大小做了排序,
积分值最高的自然会被优先选择,后面的可能的connection如果重复了,就被抑制掉了
'''
connections = np.vstack([connections, [index_a, index_b, score]])
if len(connections) >= min(len(cand_a), len(cand_b)):
break
all_connections.append(connections)
else:
all_connections.append(np.zeros((0, 3)))
return all_connections
def grouping_key_points(self, all_connections, candidate_peaks, params):
subsets = -1 * np.ones((0, 20))
'''
subsets是用来记录grouping结果的array
为什么是20列,前面18位用来记录关节点信息
总共18种关节,每一位对应candidate_peaks中的一个peak的序号
倒数第二位用来记录整个subset的得分
倒数第一位用来记录这个subset已经记录了多少个关节
18种关节不见得都要找到,缺失了就沿用默认值-1
每一个subset就代表可能检测到的一个人体
'''
for l, connections in enumerate(all_connections):
joint_a, joint_b = params['limbs_point'][l]
'''
19种躯干的连接,按照params['limbs_point'][l]的顺序依次读取之前预测出来的可能的connection
比如 (<JointType.Neck: 1>, <JointType.RightWaist: 8>)
注意这里如果按照顺序来,是不需要走回头路的,没有重复,因为人的躯干就这么几种,不存在重复的问题
'''
for ind_a, ind_b, score in connections[:, :3]:
'''
内循环,针对同一种躯干类型,之前的步骤已经预测出了很多个connection,
这些connection按道理应该属于不同的人体,因为一个人不可能有两个相同的躯干,
接下来几个if条件判断的是这3种情况,根据当前这个connection的两个端点的归属情况:
1. 如果都不属于任何一个已有的subset(joint_found_cnt == 0),则根据这个connection创建一个新的subset
2. 如果只有一个属于一个已有的subset,另一个哪都不属于,这种情况最简单,直接在这个已有的subset上再添加一个connection(躯干)
3. 如果在两个已有的subset上都能找到这两个端点之中的任何一个,那还要看:
a. 如果这两个subset上完全没有任何重复的关节,那就把这两个subset合并
b. 如果有重复,那就把这个connection在这两个subset上都添加上去(反正最后会把得分较低的subset删掉)
4. 如果有3个以上的subset都包含有这两个端点之中的任何一个,直接pass
'''
ind_a, ind_b = int(ind_a), int(ind_b)
joint_found_cnt = 0
joint_found_subset_index = [-1, -1]
for subset_ind, subset in enumerate(subsets):
# そのconnectionのjointをもってるsubsetがいる場合
# 如果存在具有该连接的联合的子集
if subset[joint_a] == ind_a or subset[joint_b] == ind_b:
joint_found_subset_index[joint_found_cnt] = subset_ind
joint_found_cnt += 1
# 上面这个for循环遍历所有已有的subset,判断当前connection是两个端点到底和几个subset重合
# print('joint_found_cnt : {}'.format(joint_found_cnt))
# print('joint_a : {}, joint_b : {}'.format(joint_a, joint_b))
# print('ind_a : {}, ind_b : {}'.format(ind_a, ind_b))
if joint_found_cnt == 1:
# '''
# 只有一个subset有重合的情况
# そのconnectionのどちらかのjointをsubsetが持っている場合
# 如果子集具有该连接的一个关节
# '''
found_subset = subsets[joint_found_subset_index[0]]
# 肩->耳のconnectionの組合せを除いて、始点の一致しか起こり得ない。肩->耳の場合、終点が一致していた場合は、既に顔のbone検出済みなので処理不要。
# 除了肩 - 耳连接的组合,只能出现起点的匹配。 在肩膀 - >耳朵的情况下,如果端点匹配,则已经不必处理,因为已经检测到面部的骨骼。
if found_subset[joint_b] != ind_b:
found_subset[joint_b] = ind_b
found_subset[-1] += 1 # increment joint count
found_subset[-2] += candidate_peaks[ind_b, 3] + score # joint bのscoreとconnectionの積分値を加算 # 添加关节b的得分和连接的积分值
elif joint_found_cnt == 2: # '''有2个subset有重合的情况'''
# subset1にjoint1が、subset2にjoint2がある場合(肩->耳のconnectionの組合せした起こり得ない)
# 如果子集1中存在关节1而子集2中存在关节2(通过组合肩 - >耳连接不会发生)
# print('limb {}: 2 subsets have any joint'.format(l))
found_subset_1 = subsets[joint_found_subset_index[0]]
found_subset_2 = subsets[joint_found_subset_index[1]]
membership = ((found_subset_1 >= 0).astype(int) + (found_subset_2 >= 0).astype(int))[:-2]
if not np.any(membership == 2): # merge two subsets when no duplication
found_subset_1[:-2] += found_subset_2[:-2] + 1 # default is -1
found_subset_1[-2:] += found_subset_2[-2:]
found_subset_1[-2:] += score # 这一步应该是错误吧,应该是found_subset_1[-2] += score, 没有必要把score值加到joint_count上面去, 不过不影响最终结果
# connectionの積分値のみ加算(jointのscoreはmerge時に全て加算済み)
# 仅添加连接的积分值(在合并时添加联合分数)
subsets = np.delete(subsets, joint_found_subset_index[1], axis=0)
else:
if found_subset_1[joint_a] == -1:
found_subset_1[joint_a] = ind_a
found_subset_1[-1] += 1
found_subset_1[-2] += candidate_peaks[ind_a, 3] + score
elif found_subset_1[joint_b] == -1:
found_subset_1[joint_b] = ind_b
found_subset_1[-1] += 1
found_subset_1[-2] += candidate_peaks[ind_b, 3] + score
if found_subset_2[joint_a] == -1:
found_subset_2[joint_a] = ind_a
found_subset_2[-1] += 1
found_subset_2[-2] += candidate_peaks[ind_a, 3] + score
elif found_subset_2[joint_b] == -1:
found_subset_2[joint_b] = ind_b
found_subset_2[-1] += 1
found_subset_2[-2] += candidate_peaks[ind_b, 3] + score
elif joint_found_cnt == 0 and l != 9 and l != 13:
# 新規subset作成, 肩耳のconnectionは新規group対象外
# 如果没有任何现成的subset匹配,则创建新的子集,肩耳连接不适用于创建新组
row = -1 * np.ones(20)
row[joint_a] = ind_a
row[joint_b] = ind_b
row[-1] = 2
row[-2] = sum(candidate_peaks[[ind_a, ind_b], 3]) + score
subsets = np.vstack([subsets, row])
elif joint_found_cnt >= 3:
pass
# delete low score subsets
keep = np.logical_and(subsets[:, -1] >= params['n_subset_limbs_thresh'], subsets[:, -2]/subsets[:, -1] >= params['subset_score_thresh'])
# params['n_subset_limbs_thresh'] = 3
# params['subset_score_thresh'] = 0.2
subsets = subsets[keep]
return subsets
def subsets_to_pose_array(self, subsets, all_peaks):
'''
这个函数没啥,
就是根据每一个subsets里的peak点的id,
去all_peaks里面取对应的坐标,然后组装成输出
'''
person_pose_array = []
for subset in subsets:
joints = []
for joint_index in subset[:18].astype('i'):
if joint_index >= 0:
joint = all_peaks[joint_index][1:3].tolist()
joint.append(2)
joints.append(joint)
else:
joints.append([0, 0, 0])
person_pose_array.append(np.array(joints))
person_pose_array = np.array(person_pose_array)
return person_pose_array
def compute_limbs_length(self, joints):
limbs = []
limbs_len = np.zeros(len(params["limbs_point"]))
for i, joint_indices in enumerate(params["limbs_point"]):
if joints[joint_indices[0]] is not None and joints[joint_indices[1]] is not None:
limbs.append([joints[joint_indices[0]], joints[joint_indices[1]]])
limbs_len[i] = np.linalg.norm(joints[joint_indices[1]][:-1] - joints[joint_indices[0]][:-1])
else:
limbs.append(None)
return limbs_len, limbs
def compute_unit_length(self, limbs_len):
unit_length = 0
base_limbs_len = limbs_len[[14, 3, 0, 13, 9]] # (鼻首、首左腰、首右腰、肩左耳、肩右耳)の長さの比率(このどれかが存在すればこれを優先的に単位長さの計算する)
non_zero_limbs_len = base_limbs_len > 0
if len(np.nonzero(non_zero_limbs_len)[0]) > 0:
limbs_len_ratio = np.array([0.85, 2.2, 2.2, 0.85, 0.85])
unit_length = np.sum(base_limbs_len[non_zero_limbs_len] / limbs_len_ratio[non_zero_limbs_len]) / len(np.nonzero(non_zero_limbs_len)[0])
else:
limbs_len_ratio = np.array([2.2, 1.7, 1.7, 2.2, 1.7, 1.7, 0.6, 0.93, 0.65, 0.85, 0.6, 0.93, 0.65, 0.85, 1, 0.2, 0.2, 0.25, 0.25])
non_zero_limbs_len = limbs_len > 0
unit_length = np.sum(limbs_len[non_zero_limbs_len] / limbs_len_ratio[non_zero_limbs_len]) / len(np.nonzero(non_zero_limbs_len)[0])
return unit_length
def get_unit_length(self, person_pose):
limbs_length, limbs = self.compute_limbs_length(person_pose)
unit_length = self.compute_unit_length(limbs_length)
return unit_length
def crop_around_keypoint(self, img, keypoint, crop_size):
x, y = keypoint
left = int(x - crop_size)
top = int(y - crop_size)
right = int(x + crop_size)
bottom = int(y + crop_size)
bbox = (left, top, right, bottom)
cropped_img = self.crop_image(img, bbox)
return cropped_img, bbox
def crop_person(self, img, person_pose, unit_length):
top_joint_priority = [4, 5, 6, 12, 16, 7, 13, 17, 8, 10, 14, 9, 11, 15, 2, 3, 0, 1, sys.maxsize]
bottom_joint_priority = [9, 6, 7, 14, 16, 8, 15, 17, 4, 2, 0, 5, 3, 1, 10, 11, 12, 13, sys.maxsize]
top_joint_index = len(top_joint_priority) - 1
bottom_joint_index = len(bottom_joint_priority) - 1
left_joint_index = 0
right_joint_index = 0
top_pos = sys.maxsize
bottom_pos = 0
left_pos = sys.maxsize
right_pos = 0
for i, joint in enumerate(person_pose):
if joint[2] > 0:
if top_joint_priority[i] < top_joint_priority[top_joint_index]:
top_joint_index = i
elif bottom_joint_priority[i] < bottom_joint_priority[bottom_joint_index]:
bottom_joint_index = i
if joint[1] < top_pos:
top_pos = joint[1]
elif joint[1] > bottom_pos:
bottom_pos = joint[1]
if joint[0] < left_pos:
left_pos = joint[0]
left_joint_index = i
elif joint[0] > right_pos:
right_pos = joint[0]
right_joint_index = i
top_padding_radio = [0.9, 1.9, 1.9, 2.9, 3.7, 1.9, 2.9, 3.7, 4.0, 5.5, 7.0, 4.0, 5.5, 7.0, 0.7, 0.8, 0.7, 0.8]
bottom_padding_radio = [6.9, 5.9, 5.9, 4.9, 4.1, 5.9, 4.9, 4.1, 3.8, 2.3, 0.8, 3.8, 2.3, 0.8, 7.1, 7.0, 7.1, 7.0]
left = (left_pos - 0.3 * unit_length).astype(int)
right = (right_pos + 0.3 * unit_length).astype(int)
top = (top_pos - top_padding_radio[top_joint_index] * unit_length).astype(int)
bottom = (bottom_pos + bottom_padding_radio[bottom_joint_index] * unit_length).astype(int)
bbox = (left, top, right, bottom)
cropped_img = self.crop_image(img, bbox)
return cropped_img, bbox
def crop_face(self, img, person_pose, unit_length):
face_size = unit_length
face_img = None
bbox = None
# if have nose
if person_pose[JointType.Nose][2] > 0:
nose_pos = person_pose[JointType.Nose][:2]
face_top = int(nose_pos[1] - face_size * 1.2)
face_bottom = int(nose_pos[1] + face_size * 0.8)
face_left = int(nose_pos[0] - face_size)
face_right = int(nose_pos[0] + face_size)
bbox = (face_left, face_top, face_right, face_bottom)
face_img = self.crop_image(img, bbox)
return face_img, bbox
def crop_hands(self, img, person_pose, unit_length):
hands = {
"left": None,
"right": None
}
if person_pose[JointType.LeftHand][2] > 0:
crop_center = person_pose[JointType.LeftHand][:-1]
if person_pose[JointType.LeftElbow][2] > 0:
direction_vec = person_pose[JointType.LeftHand][:-1] - person_pose[JointType.LeftElbow][:-1]
crop_center += (0.3 * direction_vec).astype(crop_center.dtype)
hand_img, bbox = self.crop_around_keypoint(img, crop_center, unit_length * 0.95)
hands["left"] = {
"img": hand_img,
"bbox": bbox
}
if person_pose[JointType.RightHand][2] > 0:
crop_center = person_pose[JointType.RightHand][:-1]
if person_pose[JointType.RightElbow][2] > 0:
direction_vec = person_pose[JointType.RightHand][:-1] - person_pose[JointType.RightElbow][:-1]
crop_center += (0.3 * direction_vec).astype(crop_center.dtype)
hand_img, bbox = self.crop_around_keypoint(img, crop_center, unit_length * 0.95)
hands["right"] = {
"img": hand_img,
"bbox": bbox
}
return hands
def crop_image(self, img, bbox):
left, top, right, bottom = bbox
img_h, img_w, img_ch = img.shape
box_h = bottom - top
box_w = right - left
crop_left = max(0, left)
crop_top = max(0, top)
crop_right = min(img_w, right)
crop_bottom = min(img_h, bottom)
crop_h = crop_bottom - crop_top
crop_w = crop_right - crop_left
cropped_img = img[crop_top:crop_bottom, crop_left:crop_right]
bias_x = bias_y = 0
if left < crop_left:
bias_x = crop_left - left
if top < crop_top:
bias_y = crop_top - top
# pad
padded_img = np.zeros((box_h, box_w, img_ch), dtype=np.uint8)
padded_img[bias_y:bias_y+crop_h, bias_x:bias_x+crop_w] = cropped_img
return padded_img
def preprocess(self, img):
x_data = img.astype('f')
x_data /= 255
x_data -= 0.5
x_data = x_data.transpose(2, 0, 1)[None]
return x_data
def detect_precise(self, orig_img):
orig_img_h, orig_img_w, _ = orig_img.shape
pafs_sum = 0
heatmaps_sum = 0
interpolation = cv2.INTER_CUBIC
for scale in params['inference_scales']:
# TTA, multl scale testing, scale in [0.5, 1, 1.5, 2]
multiplier = scale * params['inference_img_size'] / min(orig_img.shape[:2])
# 通过scale和实际输入img尺寸判断缩放参数,然后resize输入图片
img = cv2.resize(orig_img, (math.ceil(orig_img_w*multiplier), math.ceil(orig_img_h*multiplier)), interpolation=interpolation)
# bbox = (params['inference_img_size'], max(params['inference_img_size'], img.shape[1]))
# 这个bbox有什么用 ? 可以删掉吧
padded_img, pad = self.pad_image(img, params['downscale'], (104, 117, 123))
# 图片经过manpool缩小8倍,如果不能整除,就pad一下,(104, 117, 123)是输入数据集的均值 ?
x_data = self.preprocess(padded_img)
x_data = torch.tensor(x_data).to(self.device)
x_data.requires_grad = False
with torch.no_grad():
h1s, h2s = self.model(x_data) #输出的是6组相同尺寸的feature,训练的时候是都有用,但是推断的时候就只用最后一组
tmp_paf = h1s[-1][0].cpu().numpy().transpose(1, 2, 0)
tmp_heatmap = h2s[-1][0].cpu().numpy().transpose(1, 2, 0)
p_h, p_w = padded_img.shape[:2]
tmp_paf = cv2.resize(tmp_paf, (p_w, p_h), interpolation=interpolation)
#首先,paf先 resize到padded_img的尺寸
tmp_paf = tmp_paf[:p_h-pad[0], :p_w-pad[1], :]
#去掉padding
pafs_sum += cv2.resize(tmp_paf, (orig_img_w, orig_img_h), interpolation=interpolation)
#再resize回原始的输入img的尺寸
tmp_heatmap = cv2.resize(tmp_heatmap, (0, 0), fx=params['downscale'], fy=params['downscale'], interpolation=interpolation)
tmp_heatmap = tmp_heatmap[:padded_img.shape[0]-pad[0], :padded_img.shape[1]-pad[1], :]
heatmaps_sum += cv2.resize(tmp_heatmap, (orig_img_w, orig_img_h), interpolation=interpolation)
#heat_map的操作和pafs一样
#经过多个scale的feature计算,再对pafs_sum和heatmaps_sum求均值,就得到了TTA最终的输出feature
self.pafs = (pafs_sum / len(params['inference_scales'])).transpose(2, 0, 1)
self.heatmaps = (heatmaps_sum / len(params['inference_scales'])).transpose(2, 0, 1)
self.all_peaks = self.compute_peaks_from_heatmaps(self.heatmaps)
if len(self.all_peaks) == 0:
return np.empty((0, len(JointType), 3)), np.empty(0)
all_connections = self.compute_connections(self.pafs, self.all_peaks, orig_img_w, params)
subsets = self.grouping_key_points(all_connections, self.all_peaks, params)
poses = self.subsets_to_pose_array(subsets, self.all_peaks)
scores = subsets[:, -2]
return poses, scores
def detect(self, orig_img, precise = False):
orig_img = orig_img.copy()
if precise:
return self.detect_precise(orig_img)
orig_img_h, orig_img_w, _ = orig_img.shape
input_w, input_h = self.compute_optimal_size(orig_img, params['inference_img_size'])
map_w, map_h = self.compute_optimal_size(orig_img, params['heatmap_size'])
resized_image = cv2.resize(orig_img, (input_w, input_h))
x_data = self.preprocess(resized_image)
x_data = torch.tensor(x_data).to(self.device)
x_data.requires_grad = False
with torch.no_grad():
h1s, h2s = self.model(x_data)
pafs = F.interpolate(h1s[-1], (map_h, map_w), mode='bilinear', align_corners=True).cpu().numpy()[0]
heatmaps = F.interpolate(h2s[-1], (map_h, map_w), mode='bilinear', align_corners=True).cpu().numpy()[0]
all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
if len(all_peaks) == 0:
return np.empty((0, len(JointType), 3)), np.empty(0)
all_connections = self.compute_connections(pafs, all_peaks, map_w, params)
subsets = self.grouping_key_points(all_connections, all_peaks, params)
all_peaks[:, 1] *= orig_img_w / map_w
all_peaks[:, 2] *= orig_img_h / map_h
poses = self.subsets_to_pose_array(subsets, all_peaks)
scores = subsets[:, -2]
return poses, scores
def draw_person_pose(orig_img, poses):
orig_img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
if len(poses) == 0:
return orig_img
limb_colors = [
[0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255],
[0, 85, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0.],
[255, 0, 85], [170, 255, 0], [85, 255, 0], [170, 0, 255.], [0, 0, 255],
[0, 0, 255], [255, 0, 255], [170, 0, 255], [255, 0, 170],
]
joint_colors = [
[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
[255, 0, 255], [255, 0, 170], [255, 0, 85]]
canvas = orig_img.copy()
# limbs
for pose in poses.round().astype('i'):
for i, (limb, color) in enumerate(zip(params['limbs_point'], limb_colors)):
if i != 9 and i != 13: # don't show ear-shoulder connection
limb_ind = np.array(limb)
if np.all(pose[limb_ind][:, 2] != 0):
joint1, joint2 = pose[limb_ind][:, :2]
cv2.line(canvas, tuple(joint1), tuple(joint2), color, 2)
# joints
for pose in poses.round().astype('i'):
for i, ((x, y, v), color) in enumerate(zip(pose, joint_colors)):
if v != 0:
cv2.circle(canvas, (x, y), 3, color, -1)
return canvas