-
Notifications
You must be signed in to change notification settings - Fork 314
/
Copy pathplot.py
executable file
·126 lines (107 loc) · 7.07 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python
import os
import argparse
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# Plot options (change me)
interval_size = 200 # Report performance over the last 200 training steps
max_plot_iteration = 2500 # Maximum number of training steps to report performance
# Parse session directories
parser = argparse.ArgumentParser(description='Plot performance of a session over training time.')
parser.add_argument('session_directories', metavar='N', type=str, nargs='+', help='path to session directories for which to plot performance')
args = parser.parse_args()
session_directories = args.session_directories
# Define plot colors (Tableau palette)
colors = [[078.0/255.0,121.0/255.0,167.0/255.0], # blue
[255.0/255.0,087.0/255.0,089.0/255.0], # red
[089.0/255.0,169.0/255.0,079.0/255.0], # green
[237.0/255.0,201.0/255.0,072.0/255.0], # yellow
[242.0/255.0,142.0/255.0,043.0/255.0], # orange
[176.0/255.0,122.0/255.0,161.0/255.0], # purple
[255.0/255.0,157.0/255.0,167.0/255.0], # pink
[118.0/255.0,183.0/255.0,178.0/255.0], # cyan
[156.0/255.0,117.0/255.0,095.0/255.0], # brown
[186.0/255.0,176.0/255.0,172.0/255.0]] # gray
# Determine whether each session directory is trained in 'reactive' or 'reinforcement' mode (reward schemes differ between methods)
methods = []
for session_directory in session_directories:
# Check name of saved weights
model_list = os.listdir(os.path.join(session_directory, 'models'))
if len(model_list) > 0:
if 'reactive' in model_list[0]:
methods.append('reactive')
elif 'reinforcement' in model_list[0]:
methods.append('reinforcement')
else:
print('Error: cannot determine whether session was trained in \'reactive\' or \'reinforcement\' mode.')
else:
print('Error: no model weights saved, cannot determine whether session was trained in \'reactive\' or \'reinforcement\' mode.')
# Create plot design
plt.ylim((0, 1))
plt.ylabel('Grasping performance (success rate)')
plt.xlim((0, max_plot_iteration))
plt.xlabel('Number of training steps')
plt.grid(True, linestyle='-', color=[0.8,0.8,0.8])
ax = plt.gca()
for axis in ['top','bottom','left','right']:
ax.spines[axis].set_color('#000000')
plt.rcParams.update({'font.size': 18})
plt.rcParams['mathtext.default']='regular'
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
legend = []
for session_idx in range(len(session_directories)):
session_directory = session_directories[session_idx]
method = methods[session_idx]
color = colors[session_idx % 10]
# Get logged data
transitions_directory = os.path.join(session_directory, 'transitions')
executed_action_log = np.loadtxt(os.path.join(transitions_directory, 'executed-action.log.txt'), delimiter=' ')
max_iteration = min(executed_action_log.shape[0] - 2, max_plot_iteration)
executed_action_log = executed_action_log[0:max_iteration,:]
reward_value_log = np.loadtxt(os.path.join(transitions_directory, 'reward-value.log.txt'), delimiter=' ')
reward_value_log = reward_value_log[0:max_iteration]
# Initialize plot variables
grasp_to_push_ratio = np.zeros((max_iteration))
grasp_success = np.zeros((max_iteration))
push_then_grasp_success = np.zeros((max_iteration))
for step in range(max_iteration):
# Get indicies for previous x grasps, where x is the interval size
grasp_attempt_ind = np.argwhere(executed_action_log[:,0] == 1)
grasp_attempt_ind_before_step = grasp_attempt_ind[np.argwhere(grasp_attempt_ind[:,0] < step)]
grasp_attempt_ind_over_interval = grasp_attempt_ind_before_step[max(0,len(grasp_attempt_ind_before_step)-interval_size):len(grasp_attempt_ind_before_step),0]
# Count number of times grasp attempts were successful
# NOTE: reward_value_log just stores some value which is indicative of successful grasping, which could be a class ID (reactive) or actual reward value (from MDP, reinforcement)
if method == 'reactive':
grasp_success_over_interval = np.sum(reward_value_log[grasp_attempt_ind_over_interval] == 0)/float(min(interval_size,max(step,1))) # Class ID for successful grasping is 0 (reactive policy)
elif method == 'reinforcement':
grasp_success_over_interval = np.sum(reward_value_log[grasp_attempt_ind_over_interval] >= 0.5)/float(min(interval_size,max(step,1))) # Reward value for successful grasping is anything larger than 0.5 (reinforcement policy)
if step < interval_size:
grasp_success_over_interval *= (float(step)/float(interval_size))
grasp_success[step] = grasp_success_over_interval
# Get grasp to push ratio over previous x attempts, where x is the interval size
grasp_to_push_ratio_over_interval = float(np.sum(executed_action_log[max(0,step-interval_size):step,0] == 1))/float(min(interval_size,max(step,1)))
grasp_to_push_ratio[step] = grasp_to_push_ratio_over_interval
# Get indicies for push-then-grasp cases
push_attempt_ind = np.argwhere(executed_action_log[0:(max_iteration-1),0] == 0)
grasp_after_push_attempt_ind = push_attempt_ind[np.argwhere(executed_action_log[push_attempt_ind[:,0] + 1,0] == 1),:] + 1
grasp_after_push_attempt_ind_before_step = grasp_after_push_attempt_ind[np.argwhere(grasp_after_push_attempt_ind[:,0] < step)]
grasp_after_push_attempt_ind_over_interval = grasp_after_push_attempt_ind_before_step[max(0,len(grasp_after_push_attempt_ind_before_step)-interval_size):len(grasp_after_push_attempt_ind_before_step),0]
# Count number of times grasp after push attempts were successful
if method == 'reactive':
grasp_after_push_success_over_interval = np.sum(reward_value_log[grasp_after_push_attempt_ind_over_interval] == 0)/float(min(interval_size,max(step,1)))
elif method == 'reinforcement':
grasp_after_push_success_over_interval = np.sum(reward_value_log[grasp_after_push_attempt_ind_over_interval] >= 0.5)/float(min(interval_size,max(step,1)))
if step < interval_size:
grasp_after_push_success_over_interval *= (float(step)/float(interval_size))
push_then_grasp_success[step] = grasp_after_push_success_over_interval
# Plot grasp information
plt.plot(range(0, max_iteration), grasp_success, color=color, linewidth=3) # color='blue', linewidth=3)
# plt.fill_between(range(0, max_iteration), push_then_grasp_success, 0, color=color, alpha=0.5)
plt.plot(range(0, max_iteration), push_then_grasp_success, dashes=[8,7], color=color, linewidth=3, alpha=1, dash_capstyle='round', dash_joinstyle='round',label='_nolegend_') # color='blue', dashes=[5,5], linewidth=2, dash_capstyle='butt')
# plt.plot(range(0, max_iteration), grasp_to_push_ratio, dashes=[1,5], color=color, linewidth=2, alpha=0.5, dash_capstyle='round', dash_joinstyle='round') # color='blue', dashes=[5,5], linewidth=2, dash_capstyle='butt')
legend.append(session_directories[session_idx])
plt.legend(legend, loc='lower right', fontsize=18)
plt.tight_layout()
plt.show()