-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllm_feedback.py
234 lines (186 loc) · 9.82 KB
/
llm_feedback.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import math
import os
import re
import traceback
import cv2
import google.generativeai as genai
import PIL
import streamlit as st
text_model = genai.GenerativeModel("gemini-pro")
vision_model = genai.GenerativeModel("gemini-pro-vision")
def get_speech_topic():
try:
topic_prompt = f"""Context: The assistant receives some themes and it has to return a topic related to one of the themes for the user to speak.
Eg: if the theme is "Holiday" then the topic could be, "Talk about your last vacation"
If the theme is "Cricket", then the topic could be, "What's your unforgettable cricket moment".
Just give the topic, nothing else
Themes: {st.session_state['theme-input']}
"""
response = text_model.generate_content(topic_prompt)
speech_topic = response.text.strip()
except Exception as e:
speech_topic = "Tell me about yourself"
print(f"Error in generating topic: {traceback.format_exc()}")
st.session_state["speech-topic"] = speech_topic
def process_video():
# print(len(final_frames))
# output_directory = "output_images"
image_size = (100, 100) # Adjust the size of each frame as needed
rows, columns = 5, 5
images_per_output = rows * columns
# get the video frames to analyze
# Create a PIL Image object from the NumPy array
frames = st.session_state["raw_video_frames"]
final_frames = []
skip_frames = math.ceil(st.session_state["num_frames"] / (images_per_output * 16))
# st.write(f"{skip_frames=}")
for i in range(len(frames[::skip_frames])):
pil_image = PIL.Image.fromarray(frames[i])
final_frames.append(pil_image)
# st.write(f"{len(final_frames)=}")
total_images = min(math.ceil(len(final_frames) / images_per_output), 16)
# st.write(f"{total_images=}")
# Create the output directory if it doesn't exist
# os.makedirs(output_directory, exist_ok=True)
new_images = []
# Iterate through each set of frames for creating images
frame_number = 0
for i in range(total_images):
# Create a new image
new_image = PIL.Image.new(
"RGB", (image_size[0] * columns, image_size[1] * rows)
)
# Load frames into the image
for j in range(images_per_output):
if frame_number == len(final_frames):
break
# Open and resize the frame
# print(frame_number)
frame = final_frames[frame_number]
frame = frame.resize(image_size)
# st.image(frame)
# Calculate the position to paste the frame
row_position = (
(frame_number - (i * images_per_output)) // columns
) * image_size[1]
col_position = (frame_number % columns) * image_size[0]
# print(f"{row_position=}, {col_position=}")
# Paste the frame onto the new image
new_image.paste(frame, (col_position, row_position))
frame_number += 1
# Save the combined image
# output_path = os.path.join(output_directory, f"output_image_{i + 1}.png")
# new_image.save(output_path)
# st.image(new_image)
new_images.append(new_image)
print("Images created successfully.")
return new_images
def generate_video_feedback():
try:
user_images = process_video()
img_prompt = """Context: The assistant receives a tiled series of screenshots from a user's live video feed. These screenshots represent sequential frames from the video, capturing distinct moments. The assistant is to analyze these frames as a continuous video feed.
1. Address the user directly, and assume that what is shown in the images is what the user is doing.
2. Use "you" and "your" to refer to the user.
3. DO NOT mention a series of individual images, a strip, a grid, a pattern or a sequence. Do as if the user and the assistant were both seeing the video.
4. Keep in mind that the grid of images will show the same object in a sequence of time. E.g. If there are multiple people in different part of the images, it is the same person and NOT multiple people.
5. Analyze the video of the person and give them feedback on how to improve their body language with respect to public speaking. Only analyze the video, not the audio
6. Tell them what they did right while speaking, and then give them feedback as to what could be improved.
7. Finally conclude your feedback.
"""
response = vision_model.generate_content(contents=[img_prompt, *user_images])
response.resolve()
video_feedback = response.text.strip()
# video_feedback = "Sample Video Feedback"
except:
print(f"Excption in generating video feedback: {traceback.format_exc()}")
video_feedback = "Error in generating video feedback!"
st.subheader("Video Feedback")
st.write(video_feedback)
st.divider()
def get_grammarian_feedback(speech_text, topic):
grammarian_prompt_template = f"""Imagine you're a dedicated language coach for a public speaking skills improvement club, equipped with the vast knowledge about vocabulary, grammar and languages. Your mission is to empower each member to become a confident and captivating speaker.
Context: The assistant receives the text of the speech by the user, and the topic on which the user was supposed to speak. Given the text and the topic, here are the things you have to give feedback on:
Instructions: 1. Address the user directly, and assume that what is shown in the images is what the user is doing. 2. Use "you" and "your" to refer to the user.
Wordsmith Spotlight:
Did the speaker utilize any unique or under-appreciated words that deserve a shout-out? Share their "Word of the Speech" moment, explaining its meaning and showcasing its potential to enrich future speeches.
Did you encounter any instances of particularly vivid or impactful phrasing? Highlight these gems and encourage similar strategies.
Fluency Feedback:
How smoothly did the speech flow? Did transitions feel natural, or were there any areas that could benefit from tighter phrasing or bridge sentences? Offer specific suggestions for improved coherence and pacing.
Did you notice any patterns in sentence structure or word choice? Identify areas where speaker shines and suggest techniques to inject further variety and dynamism into their delivery.
Grammar Guardian:
Were there any grammatical errors or awkward constructions that could be gently addressed? Craft a private message for speaker providing alternative word choices, clearer sentence structures, and polished punctuation suggestions. Remember, constructive and positive feedback is key!
Relevance to the Topic:
Did the user speak on the assigned topic?
Learning Lighthouse:
Based on the speech, what specific online resources or language learning activities would benefit speaker most? Recommend tailored materials that address any areas for improvement while also aligning with their individual learning style and interests.
Overall:
Provide a personalized and encouraging evaluation that celebrates speaker's strengths while gently guiding them towards even greater linguistic mastery. Remember, your goal is to empower them to find their voice, refine their message, and deliver impactful presentations with increased confidence and clarity.
Topic given to the user: {topic}
Here is the speech given by the user
```
{speech_text}
```
"""
try:
grammarian_response = text_model.generate_content(grammarian_prompt_template)
return grammarian_response.text.strip()
except Exception as e:
print(f"Exception in grammarian feedback => {traceback.format_exc()}")
return ""
def count_filler_words(text):
filler_words = [
"um",
"ah",
"er",
"so",
"like",
"you know",
"well",
"i mean",
"actually",
"basically",
"literally",
"okay",
"right",
"sort of",
"kind of",
"i guess",
"i think",
"i believe",
"i hope",
"i feel",
"you see",
"you know what",
]
filler_word_counts = {}
cleaned_text = re.sub(r"[^a-zA-Z0-9\s]", "", text).lower()
for word in filler_words:
filler_word_counts[word] = cleaned_text.split().count(word)
return dict(sorted(filler_word_counts.items(), key=lambda x: x[1], reverse=True))
def get_ah_counter_feedback(speech_text):
try:
filler_word_counts = count_filler_words(text=speech_text)
ah_counter_prompt_template = f"""Your job is to keep the user aware of how many filler words or empty words they are using in their speech.
Here is a dictionary of counts of filler words used by the user. Just give the feedback to the user. Highlight the top 5 or 10 filler words used by them. Only highligh the filler words used by the speaker. Don't tell them what they didn't speak.
```
{filler_word_counts}
```
"""
ah_counter_response = text_model.generate_content(ah_counter_prompt_template)
return ah_counter_response.text.strip()
except Exception as e:
print(f"Exception in ah counter feedback => {traceback.format_exc()}")
return ""
def generate_text_feedback(speech_text, topic):
"""
function to give feedback on the speech text.
"""
st.subheader("Speech Feedback")
# st.write("Sample Speech Feedback")
ah_counter_feedback = get_ah_counter_feedback(speech_text=speech_text)
st.write(ah_counter_feedback)
grammarian_feedback = get_grammarian_feedback(
speech_text=speech_text,
topic=topic,
)
st.write(grammarian_feedback)