-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinteractive_demo.py
220 lines (183 loc) · 10.2 KB
/
interactive_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import argparse
import cv2
import numpy as np
import pyzed.sl as sl
import os
from MeasureAnything import MeasureAnything
from demo_utils import get_click_coordinates, display_with_overlay, scale_points
def main():
# Argument parser setup
parser = argparse.ArgumentParser(description="Interactive demo of Measure Anything using SAM-2 with point prompts")
parser.add_argument('--input_svo', type=str, required=True, help='Path to the input .SVO file')
parser.add_argument('--thin_and_long', action=argparse.BooleanOptionalAction, help='Flag variable that decides whether to skeletonize or use symmetry axis')
parser.add_argument('--stride', type=int, help='Stride used to calculate line segments')
args = parser.parse_args()
directory_name = os.path.split(args.input_svo)[1].split('.')[0]
# Initialize command line inputs
stride = args.stride if args.stride else 10
# Create a ZED camera object
zed = sl.Camera()
# Initialize the ZED camera, specify depth mode, minimum distance
init_params = sl.InitParameters(camera_disable_self_calib=True)
init_params.set_from_svo_file(args.input_svo)
init_params.coordinate_units = sl.UNIT.CENTIMETER
init_params.depth_mode = sl.DEPTH_MODE.NEURAL
init_params.depth_minimum_distance = 0.2
if zed.open(init_params) != sl.ERROR_CODE.SUCCESS:
print("Error opening ZED camera")
return
# Enable fill mode
runtime_parameters = sl.RuntimeParameters(enable_fill_mode=True)
RGB = sl.Mat()
frame_count = 0
prompt_data = {'positive_points': [], 'negative_points': [], 'clicked': False}
# Main loop to extract frames and display video
while True:
if zed.grab(runtime_parameters) == sl.ERROR_CODE.SUCCESS:
frame_count += 1
# Retrieve the RGB frame
zed.retrieve_image(RGB, sl.VIEW.LEFT)
image_ocv = RGB.get_data()
image_rgb = cv2.cvtColor(image_ocv, cv2.COLOR_BGRA2BGR)
# Retrieve the depth frame
depth_for_display = sl.Mat()
zed.retrieve_image(depth_for_display, sl.VIEW.DEPTH)
image_depth = depth_for_display.get_data()
image_depth = image_depth[:, :, 0]
# Resize the image for display
display_width, display_height = 1600, 900
resized_image = cv2.resize(image_rgb, (display_width, display_height))
resized_depth = cv2.resize(image_depth, (display_width, display_height))
# Calculate scale factors for x and y
scale_x = image_rgb.shape[1] / display_width
scale_y = image_rgb.shape[0] / display_height
# Display frame with basic instructions
instructions = ["Press 's' to select frame"]
display_with_overlay(image_rgb,
image_depth,
[],
[],
[],
display_dimensions=[display_width, display_height],
diameters=None,
save=False,
save_name="",
mask=None,
overlay_text=instructions)
# Wait for key input
key = cv2.waitKey(1)
if key == ord('q'): # Quit the loop
break
elif key == ord('s'): # Stop on 's' to select points
# Set mouse callback for collecting points
cv2.setMouseCallback("Video Feed", get_click_coordinates, param=prompt_data)
# Wait for user to select points and press 'c' to continue
while True:
key = cv2.waitKey(1)
# Detailed instructions
detailed_instructions = [
f"Frame: {frame_count}",
"'Left-click' to add positive point",
"'Ctrl + Left-click' to add negative point",
"Press 'c' to continue"
]
# Display with current positive and negative points
display_with_overlay(resized_image,
None,
prompt_data['positive_points'],
prompt_data['negative_points'],
[],
display_dimensions=[display_width, display_height],
diameters=None,
volume=None,
length=None,
save=False,
save_name="",
mask=None,
overlay_text=detailed_instructions)
if key == ord('c'): # Continue once points are collected
break
# Remove mouse callback
cv2.setMouseCallback("Video Feed", lambda *unused: None)
# Scale up the prompts to the original image dimensions
positive_prompts = scale_points(prompt_data['positive_points'], scale_x, scale_y)
negative_prompts = scale_points(prompt_data['negative_points'], scale_x, scale_y) if prompt_data[
'negative_points'] else None
# Create directory to save results
if not os.path.exists(f"./output/{directory_name}/results_frame_{frame_count}"):
os.makedirs(f"./output/{directory_name}/results_frame_{frame_count}")
# Save RGB
cv2.imwrite(f"./output/{directory_name}/results_frame_{frame_count}/rgb.png", image_rgb)
# Initialize MeasureAnything object
object = MeasureAnything(zed=zed, window=25, stride=stride, thin_and_long=args.thin_and_long,
image_file=None)
object.detect_mask(image=image_rgb, positive_prompts=positive_prompts,
negative_prompts=negative_prompts)
# Process depth frame and save as color
depth_map_norm = cv2.normalize(image_depth, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
color_depth_map = cv2.applyColorMap(depth_map_norm, cv2.COLORMAP_JET)
color_depth_map[image_depth == 0] = [0, 0, 0]
cv2.imwrite(f"./output/{directory_name}/results_frame_{frame_count}/depth_map.png", color_depth_map)
# Process mask and save
object.process_mask()
processed_mask = object.processed_binary_mask
cv2.imwrite(f"./output/{directory_name}/results_frame_{frame_count}/processed_mask.png",
object.processed_binary_mask_0_255)
# Skeletonize, obtain skeleton_coordinates
if args.thin_and_long:
object.skeletonize_and_prune()
object.augment_skeleton()
else:
object.build_skeleton_from_symmetry_axis()
# Obtain perpendicular line segment coordinates, respective depth
object.calculate_perpendicular_slope()
line_segment_coordinates, depth = object.calculate_line_segment_coordinates_and_depth()
# Calculate measurements
diameters = object.calculate_diameter(line_segment_coordinates, depth)
volume, length = object.calculate_volume_and_length(line_segment_coordinates, depth)
# Save results
np.save(f"./output/{directory_name}/results_frame_{frame_count}/diameters.npy", diameters)
np.save(f"./output/{directory_name}/results_frame_{frame_count}/volume.npy", volume)
np.save(f"./output/{directory_name}/results_frame_{frame_count}/length.npy", length)
# Display overlay with segmentation and wait for 'c' to continue
overlay_text = [f"Frame:{frame_count}", "Press 'c' to continue"]
while True:
display_with_overlay(image_rgb,
None,
[],
[],
line_segment_coordinates,
diameters=diameters,
volume=volume,
length=length,
display_dimensions=[display_width, display_height],
save=False,
save_name="",
mask=processed_mask,
overlay_text=overlay_text)
key = cv2.waitKey(1)
if key == ord('c'):
# Save image when continued
display_with_overlay(image_rgb,
None,
[],
[],
line_segment_coordinates,
diameters=diameters,
volume=volume,
length=length,
save=True,
display_dimensions=[display_width, display_height],
save_name=f"./output/{directory_name}/results_frame_{frame_count}/final_result.png",
mask=processed_mask,
overlay_text=overlay_text)
break
# Reset prompt data for the next frame
prompt_data['positive_points'].clear()
prompt_data['negative_points'].clear()
prompt_data['clicked'] = False
# Close camera and windows
zed.close()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()