-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
77 lines (64 loc) · 2.56 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import model
# Define data transforms for input images
data_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5], std=[0.5])
])
# Define class labels
classes = ["Angry", "Fear", "Happy", "Sadness", "Surprise"]
# Load the trained model
model = model.EmotionDetector()
model.load_state_dict(torch.load("./output/emotion_detection_model_grayscale_best.pth"))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Open the camera device
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Could not open camera device")
exit()
# Process each frame from the camera
while True:
# Read a frame from the camera
ret, frame = cap.read()
if not ret:
break
# Convert the frame to grayscale and detect faces using a Haar cascade classifier
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.05, minNeighbors=8)
# Process each detected face
for (x, y, w, h) in faces:
# Draw a border around the face
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
# Crop the face region and preprocess it
face_img = gray[y:y+h, x:x+w]
pil_img = Image.fromarray(face_img)
img_tensor = data_transforms(pil_img)
img_tensor = torch.unsqueeze(img_tensor, 0)
# Classify the face using the trained model
with torch.no_grad():
img_tensor = img_tensor.to(device)
outputs = model(img_tensor)
_, predicted = torch.max(outputs.data, 1)
confidence = np.exp(outputs[0][predicted].cpu().numpy()) / np.exp(outputs[0].cpu().numpy()).sum()
# Show the predicted class and confidence level on the border of the face
cv2.putText(frame, f"{classes[predicted]} ({float(confidence):.2f})", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.9, (0, 255, 0), 2)
# Show the processed frame in the OpenCV window
cv2.imshow("Emotion Detection", frame)
# Wait for a key press and check if the user wants to quit
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
# Release the camera device and close the OpenCV window
cap.release()
cv2.destroyAllWindows()