youtubesummarizer/main.py

import os

from openai import OpenAI
from pytube import YouTube

def get_api_key():
    try:
        OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
        return OPENAI_API_KEY
    except Exception as e:
        print(f"Failed to retrieve OpenAI API Key: {e}")
        return None

# Downloads the audio from a YouTube video.
def download(video_url, output_filename='audio.mp4'):
    try:
        print("Downloading audio...")
        yt = YouTube(video_url)
        audio_stream = yt.streams.get_audio_only()
        audio_filename = audio_stream.download(filename=output_filename)
        print(f"Downloaded '{yt.title}' audio.")
        return audio_filename
    except Exception as e:
        print(f"Failed to download audio: {e}")
        return None

# Transcribes the audio file using OpenAI's transcription service.
def transcription(OPENAI_API_KEY, audio_filename):
    try:
        print("Transcriptiting audio...")

        client = OpenAI(api_key=OPENAI_API_KEY)
        audio_file = open(audio_filename, "rb")

        transcript = client.audio.transcriptions.create(
        file=audio_file,
        model="whisper-1",
        )

        print("Transcription finished.")
        return transcript
    except Exception as e:
        print(f"Failed to transcript audio: {e}")
        return None

# Generates a summary for a given transcript using GPT-4.
def summary(OPENAI_API_KEY, transcript):
    try:
        print("Summarizing transcript...")
        client = OpenAI(api_key=OPENAI_API_KEY)
        stream = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "The prompt you will receive will be the transcript of a youtube video, your objective is to summarize the content of that transcript to the best of your ability, keep in mind there may be music or other parasit noises in the transcript."},
                {"role": "user", "content": str(transcript)}
                ],
            stream=True,
        )

        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                print(chunk.choices[0].delta.content, end="", flush=True)

        print("\nSummarizing finished.")
    except Exception as e:
        print(f"Failed to summarize transcript: {e}")
        return None

# Deletes the specified audio file.
def cleanup(audio_filename):
    try:
        print("Deleting audio...")
        os.remove(audio_filename)
        print(f"Deleted audio.")
    except Exception as e:
        print(f"Failed to delete audio: {e}")
        return None

# Main function to orchestrate the download, transcription, summarization, and cleanup process.
def main():
    try:
        OPENAI_API_KEY = get_api_key()
        video_url = str(input("Video url: "))
        audio_filename = download(video_url)
        if audio_filename is not None:
            transcript = transcription(OPENAI_API_KEY, audio_filename)
            if transcript is not None:
                summary(OPENAI_API_KEY, transcript)
            cleanup(audio_filename)
    except Exception as e:
        print(e)
    finally:
        input("Press Enter to exit...")

if __name__ == "__main__":
    main()