-
Notifications
You must be signed in to change notification settings - Fork 0
/
mac_transcibe.py
154 lines (130 loc) · 6.18 KB
/
mac_transcibe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
This script is designed for transcribing audio files and generating concise summaries of the transcribed text. It first checks for necessary dependencies, including ffmpeg and Python packages like 'whisper' and 'transformers'. The script can transcribe audio files to text using the Whisper model. It then uses the Llama model (Mistral 7B Instruct version) to analyze and summarize the transcribed text, producing a concise summary of the key points and topics discussed in the audio.
Functions:
- check_dependencies(): Verifies the presence of required software and packages.
- transcribe_audio(file_path): Transcribes the audio from the given file path using the Whisper model.
- analyze_and_summarize(text, llm): Analyzes and summarizes the provided text using the Llama model.
- main(): Orchestrates the flow of the script from dependency checking, audio transcription, to text summarization.
"""
import subprocess
import sys
import os
import torch
from transformers import AutoTokenizer
from llama_cpp import Llama
import whisper
def check_dependencies():
"""
Checks for the presence of required dependencies: ffmpeg and specific Python packages.
Exits the script with an error message if a dependency is missing.
"""
# Check for ffmpeg
try:
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except FileNotFoundError:
print("ffmpeg is required. \nTo install ffmpeg:")
print(" - On macOS: 'brew install ffmpeg'")
sys.exit(1)
# Check for Python packages
try:
import whisper
from transformers import AutoModelForCausalLM
except ImportError as e:
missing_package = str(e).split("'")[1]
print(f"Dependency missing: {missing_package}")
if missing_package == 'whisper':
print("To install, run: pip install git+https://github.com/openai/whisper.git")
elif missing_package == 'transformers':
print("To install, run: pip install transformers")
else:
print(f"To install, run: pip install {missing_package}")
sys.exit(1)
def transcribe_audio(file_path):
"""
Transcribes the audio from the specified file path using the Whisper model.
Args:
- file_path (str): Path to the audio file to be transcribed.
Returns:
- str: Transcribed text of the audio.
Raises:
- Exception: If any error occurs during transcription.
"""
try:
model = whisper.load_model("base")
return model.transcribe(file_path)["text"]
except Exception as e:
print(f"Error during transcription: {e}")
sys.exit(1)
def analyze_and_summarize(text, llm):
"""
Analyzes and summarizes the given text using the Llama model.
Args:
- text (str): Text to be summarized.
- llm (Llama): Initialized Llama model.
Returns:
- str: Summarized text.
The function constructs a specific prompt for the Llama model, sends the text for processing, and retrieves the summarized output. It also writes the summary to a text file.
"""
prompt = f"<s>[INST] You are tasked with creating concise meeting notes from transcribed audio. For each distinct topic discussed in the meeting, summarize it into a paragraph as a topic covered. Include an overview, key points, decisions, and action items related to that topic. Omit any small talk or non-essential discussions from the summary, such as conversations about the weather or personal matters not relevant to the meeting's objectives. Focus solely on the core topics and actionable insights. Transcript: {text} [/INST]</s>"
# Inference using the Llama model
output = llm(
prompt,
max_tokens=32768,
stop=["</s>"],
echo=False
)
# Extracting text from the output
if isinstance(output, dict) and 'choices' in output:
response = output['choices'][0]['text'] if output['choices'] else "No response generated."
else:
response = str(output) # Convert to string if not in expected format
# Write summary to a file
with open('summary.txt', 'w') as file:
file.write(response)
return response
def main():
"""
Main function to orchestrate the script's operations.
It involves checking dependencies, transcribing audio, and summarizing the transcription.
"""
check_dependencies()
file_path = input("Please enter the path to your MP3 audio file: ")
transcription_file = file_path.rsplit('.', 1)[0] + '_transcription.txt'
transcribed_text = ''
if os.path.exists(transcription_file):
use_existing = input(f"Transcription file {transcription_file} already exists. Use it (y/n)? ")
if use_existing.lower() != 'y':
transcribed_text = transcribe_audio(file_path)
with open(transcription_file, 'w') as f:
f.write(transcribed_text)
else:
with open(transcription_file, 'r') as f:
transcribed_text = f.read()
else:
transcribed_text = transcribe_audio(file_path)
with open(transcription_file, 'w') as f:
f.write(transcribed_text)
# GPU acceleration setup
device = "mps" if torch.backends.mps.is_available() else "cpu"
# Model file existence check
model_path = "./mistral-7b-instruct-v0.2.Q4_K_M.gguf"
if not os.path.exists(model_path):
print(f"Model file not found at {model_path}.")
print("Please download the model file before proceeding.")
print("Use the following command to download the model file:")
print(f"huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF {model_path} --local-dir . --local-dir-use-symlinks False")
sys.exit(1)
# Initialize Llama model
llm = Llama(
model_path=model_path,
n_ctx=32768, # Max context window for the model
n_threads=8, # Number of CPU threads to use
n_gpu_layers=35 # Number of layers to offload to GPU
)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
# Analyze and summarize the transcription
summary = analyze_and_summarize(transcribed_text, llm)
print("Summary:", summary)
if __name__ == "__main__":
main()