-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtry_1.py
368 lines (276 loc) · 9.01 KB
/
try_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
#Importing Libraries
import os
import cv2
import time
import struct
import socket
import pyaudio
import freenect
import wikipedia
import playsound
import numpy as np
from gtts import gTTS
from scripts.rhino.rhino import *
from scripts.porcupine.porcupine import *
#Fucntion to get images
def get_image(type, client): #'type' to tell about RGB image/depth image
path = ""
file = open(path,'w')
file.write(IPaddr)
file.close()
#Sending the type of Image
client.send(type)
# It will wait until it gets the file name which is passed from the send function
file_name = client.recv(1024).decode()
print(file_name)
# This will open a new file in your python Dir with same file name
file = open(file_name,'wb')
# It will recieve the starting 10 bytes
data = client.recv(10)
while data:
#print(data)
file.write(data)
data = client.recv(1024)
print("Data Recieved Succesfully")
client.close()
#returning RGB or depth image
image = cv2.imread(file_name)
return image
#Function to check if center cit ent
def co_incident():
pass
#Function to go to an object
def goTo(slots, net, LABELS, ln, client):
#Getting the Value of the Key in Dictonary-Slots
obj = str(slots['ob1'])
#Initializing the variables
x = y = z = None
#Getting the coordinated of the object
(x,y,z) = getCoordinates(obj, net, LABELS, ln, client)
#Checking if the object was found or not
if x == None or y == None or z == None:
#Speaking that object was not found
print("None here")
playsound.playsound('not_found.mp3')
else:
#Ensuring the centers co-incident
co_incident()
print(x,y,z)
#Move towards the object
while z>=0.0:
#Move forward and check the distance again
send(client, "forward")
time.sleep(1)
(x,y,z) = getCoordinates(obj, net, LABELS, ln, client)
#Function to get the coordinated of the given object
def getCoordinates(obj, net, LABELS, ln, client):
while True:
#Get Images from Rpi
frame = get_image("image", client)
#Get Depth Image from Rpi
depth = get_image("depth", client)
#Fetting Shape of the frame
(H, W) = frame.shape[:2]
#Creating blob from image
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (224, 224), swapRB = True, crop = False)
net.setInput(blob)
layerOutputs = net.forward(ln)
#Initializing lists for displaying the output
boxes = []
confidences = []
classIds = []
#Looping over each layer's output
for output in layerOutputs:
#Looping over each detection
for detect in output:
#Extracting ClassID and confidence
score = detect[5:]
classID = np.argmax(score)
confidence = score[classID]
#Filtering weak detection
if confidence > 0.5:
#Getting bounding rectangle
box = detect[:4] * np.array([W, H, W, H])
(centerX, centerY, Width, Height) = box.astype("int")
#Getting Top and Left Points
x = int(centerX - (Width/2))
y = int(centerY - (Height/2))
#Adding to lists
boxes.append([x, y, int(Width), int(Height)])
classIds.append(classID)
confidences.append(float(confidence))
#Non-Maxia Suppression
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)
#Checking Minimum Detection
if len(idxs) > 0:
#Looping over indexs
for i in idxs.flatten():
x = boxes[i][0]
y = boxes[i][1]
w1 = boxes[i][2]
h1 = boxes[i][3]
if LABELS[classIds[i]] == obj:
#Calculating the coordinates
print("Here")
cx = int(x + (w1/2))
cy = int(y + (h1/2))
cz = 0.1236 * np.tan(depth[cy][cx] / 2842.5 + 1.1863)
return (cx,cy,cz)
#Function to speak/interact
def speak(slots):
#Getting the Value of the Key in Dictonary-Slots
keyword = str(slots['p1'])
#If the keyword in known
if keyword == "yourself":
#Declaring the text
splitted = ["Hey, my name is groooot. I am a cute, cute robooooo. I am designed by Gaurav, Harish and Swati, and I work for them. Nice meeting you. I am here to help you, just spell groooooot."]
#If keyword is not known
else:
#Searching
search_result = wikipedia.summary(keyword)
#Spliting
splitted = search_result.split("\n")
#Speech to text model
speech = gTTS(text = splitted[0], lang = 'en-in' , slow = False)
#Saving Audio File
speech.save("speak.mp3")
#Running Audio file
playsound.playsound('speak.mp3')
def send(client, dir):
#Sending data to server
client.send(dir)
#Waiting for feedback
while client.recv(1024)!= 'done':
pass
client.close()
#Main Function
def main():
#Initializing Variables
awake = False
intent_extraction_is_finalized = False
#Loading Picovoice Models
rhino_wakeword = Porcupine(library_path = "/home/garima/Gaurav/Blog_2/Integrated/res/libpv_porcupine.so",
model_file_path = "/home/garima/Gaurav/Blog_2/Integrated/res/porcupine_params.pv",
keyword_file_paths = ["/home/garima/Gaurav/Blog_2/Integrated/res/hey_groot.ppn"],
sensitivities = [0.5])
rhino_commands = Rhino(library_path = "/home/garima/Gaurav/Blog_2/Integrated/res/libpv_rhino.so",
model_path = "/home/garima/Gaurav/Blog_2/Integrated/res/rhino_params.pv",
context_path = "/home/garima/Gaurav/Blog_2/Integrated/res/robo.rhn")
# setup audio
pa = pyaudio.PyAudio()
audio_stream = pa.open(rate = rhino_commands.sample_rate,
channels = 1,
format = pyaudio.paInt16,
input = True, frames_per_buffer=rhino_commands.frame_length)
#Loading label, weight and configuration model paths for YOLO
labelPath = os.path.sep.join(["yolo-coco","coco.names"])
weightPath = os.path.sep.join(["yolo-coco", "yolov3.weights"])
configPath = os.path.sep.join(["yolo-coco", "yolov3.cfg"])
#Loading Labels
LABELS = open(labelPath).read().strip().split("\n")
#Loading YOLO
net = cv2.dnn.readNetFromDarknet(configPath, weightPath)
#Determining YOLO output layer
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
#Setting up Rpi GPIO pins numbering
#GPIO.setmode(GPIO.BOARD)
#Declaring Pin modes
#GPIO.setup(3, GPIO.OUT)
#GPIO.setup(5, GPIO.OUT)
#GPIO.setup(11, GPIO.OUT)
#GPIO.setup(13, GPIO.OUT)
#Making Commonly used Audio Files
#Speech to Text
wake = gTTS(text = "At your service friend!", lang = "en-in", slow = False)
error = gTTS(text = "I'm tired! I will take a nap.", lang = "en-in", slow = False)
not_found = gTTS(text = "Object not found!", lang = "en-in", slow = False)
not_understood = gTTS(text = "I understand your order friend", lang = "en-in", slow = False)
#Saving Audio File
wake.save("wake.mp3")
error.save("error.mp3")
not_found.save("not_found.mp3")
not_understood.save("unclear.mp3")
#Sockets Initializing
network = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
# Intialising the Port
port = 12345
network.bind(('',port))
hostname = socket.gethostname()
IPaddr = socket.gethostbyname(hostname)
network.listen(5)
# Geting Client host name and the IP address Details
client, addr = network.accept()
print("Start")
# detect commands in continuous loop
while True:
#Reading Input
pcm = audio_stream.read(rhino_commands.frame_length)
pcm = struct.unpack_from("h" * rhino_commands.frame_length, pcm)
try:
#If wake word is not spoken
if not awake:
#Processing the voice input
result = rhino_wakeword.process(pcm)
#If wake word is the input, result is true
if result:
#Wake Word detected
awake = True
time.sleep(0.1)
print("awake")
#playsound.playsound('wake.mp3')
#os.system('mpg321 wake.mp3')
#time.sleep(5)
print("Speak More")
elif not intent_extraction_is_finalized:
#Getting Intent Extraction
intent_extraction_is_finalized = rhino_commands.process(pcm)
else:
#If the command is detected
if rhino_commands.is_understood():
#Getting Intent and Slots
intent, slots = rhino_commands.get_intent()
print(intent)
playsound.playsound('wake.mp3')
#os.system('mpg321 wake.mp3')
#Checking Intent and doing Neccessary Action
#If going to an object is an intent
if intent == "goTo":
#Shift the control to goTo function
goTo(slots, net, LABELS, ln, client)
#If speaking is the intent
elif intent == "speak":
#Shift the control to speak function
speak(slots)
#If coming back in the intent
#elif intent == "comeBack":
#Shift the control to comeBack function
#comeBack(slots)
#If Stop is the intent
elif intent == "stop":
#Shift the control to stop function
stop()
#No match
else:
#Command not found
time.sleep(0.1)
print("1")
playsound.playsound('unclear.mp3')
#Command not understood
else:
#print("Command not understood")
time.sleep(0.1)
playsound.playsound('unclear.mp3')
#Resetting Rhino to detect new command
rhino_commands.reset()
awake = False
intent_extraction_is_finalized = False
except Exception as e:
print(e)
time.sleep(0.1)
playsound.playsound('error.mp3')
exit()
#os.system('python3 try_1.py')
#Calling Main Funciton
if __name__ == "__main__":
main()