program works, need some cleaning up

This commit is contained in:
2024-02-13 22:02:56 -05:00
parent 5b24fd69e1
commit 5d6d910439
2 changed files with 60 additions and 0 deletions

BIN
audio.mp4 Normal file

Binary file not shown.

60
main.py
View File

@@ -0,0 +1,60 @@
# Youtube Video Summarizer
# Steps: Download audio of youtube video, transcribe text using openaimodels, from text use chatgpt models to summarize the content.
import os
from openai import OpenAI
from pytube import YouTube
def download_audio(video_url, output_filename='audio.mp4'):
try:
yt = YouTube(video_url)
audio_stream = yt.streams.get_audio_only()
audio_filename = audio_stream.download(filename=output_filename)
print(f"Downloaded '{yt.title}' audio to {audio_filename}")
return audio_filename
except Exception as e:
print(f"Failed to download audio: {e}")
return None
def transcription(OPENAI_API_KEY, audio_filename):
client = OpenAI(api_key=OPENAI_API_KEY)
audio_file = open(audio_filename, "rb")
transcript = client.audio.transcriptions.create(
file=audio_file,
model="whisper-1",
language="en",
prompt="",
response_format="json",
temperature=0.0
)
print("Transcription finished")
return transcript
def summarize(OPENAI_API_KEY, transcript):
client = OpenAI(api_key=OPENAI_API_KEY)
stream = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "The prompt you will receive will be the transcript of a youtube video, your objective is to summarize the content of that transcript to the best of your ability, keep in mind there may be music or other parasit noises in the transcript."},
{"role": "user", "content": str(transcript)}
],
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
def main():
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
video_url = str(input("video url:"))
audio_filename = download_audio(video_url)
transcript = transcription(OPENAI_API_KEY, audio_filename)
summarize(OPENAI_API_KEY, transcript)
if __name__ == "__main__":
main()