From 79371c05d80e8c3cc1dfe79f3142b4511794ce1d Mon Sep 17 00:00:00 2001 From: Stefano Fancello Date: Sat, 22 Jun 2024 08:39:55 +0200 Subject: [PATCH] add LANGUAGE environment to process videos using different languages than en --- README.md | 2 ++ src/ytsummary.py | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7a6edec..dbe9f51 100644 --- a/README.md +++ b/README.md @@ -86,5 +86,7 @@ https://www.youtube.com/@lexfridman https://www.youtube.com/@hubermanlab ``` +`LANGUAGE`: transcription language to use. Default "en" + then wait ⏱️ (cron is launched every hour and take videos published during the previous hour) diff --git a/src/ytsummary.py b/src/ytsummary.py index c1b74f9..c387002 100644 --- a/src/ytsummary.py +++ b/src/ytsummary.py @@ -1,9 +1,10 @@ from langchain_community.document_loaders import YoutubeLoader from langchain.chains.summarize import load_summarize_chain from langchain_openai import ChatOpenAI +import os def get_summary(url): - loader = YoutubeLoader.from_youtube_url(url) + loader = YoutubeLoader.from_youtube_url(url, language=os.getenv("LANGUAGE", "en")) documents = loader.load() llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo") @@ -24,7 +25,7 @@ def get_summary(url): return summary['output_text'] def get_summary2(url): - loader = YoutubeLoader.from_youtube_url(url) + loader = YoutubeLoader.from_youtube_url(url, language=os.getenv("LANGUAGE", "en")) documents = loader.load() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") @@ -43,7 +44,7 @@ def get_summary2(url): return "\n".join([s['output_text'] for s in summary]) def get_summary3(url): - loader = YoutubeLoader.from_youtube_url(url) + loader = YoutubeLoader.from_youtube_url(url, language=os.getenv("LANGUAGE", "en")) documents = loader.load() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") @@ -66,7 +67,7 @@ def get_summary3(url): Identify: Recognize all distinct concepts mentioned in the text. For each concept, Reword and Rephrase the concept if necessary to ensure clarity and conciseness. Output the summary of each concept and the conclusions and points of view of the discussion about it without any title in plain text. -Ensure that the output is comprehensive and captures every idea presented in the text. +Ensure that the output is comprehensive and captures every idea presented in the text. Use same language as the text. Text: {text}