added a compress audio function to satisfy the sub 25MB limit of OpenAi api.

2024-02-14 14:56:00 -05:00
parent 1a8da620d2
commit 4084eab4ae
2 changed files with 30 additions and 3 deletions
--- a/main.py
+++ b/main.py
@@ -2,6 +2,7 @@
 import os

 from openai import OpenAI 
+from pydub import AudioSegment
 from pytube import YouTube

 def get_api_key():
@@ -31,6 +32,31 @@ def download(video_url, output_filename='audio.mp4'):
        print(f"\033[91mFailed to download audio: {e}\033[0m")
        return None

+# Compresses an audio file to target size in MB.
+def compress_audio(input_filename, target_size_mb=25):
+    try:
+        print("Compressing audio...")
+        audio = AudioSegment.from_file(input_filename)
+
+        # Split the input filename into directory and file components
+        dir_name, file_name = os.path.split(input_filename)
+
+        # Add 'compressed_' prefix to just the file name, not the entire path
+        output_filename = os.path.join(dir_name, "compressed_" + file_name[:-4] + ".mp3")
+
+        target_size_bits = target_size_mb * 8 * 1024 * 1024
+        target_bitrate = int(target_size_bits / len(audio))
+
+        compressed_audio = audio.set_frame_rate(24000).set_channels(1).set_sample_width(2)
+
+        compressed_audio.export(output_filename, format="mp3", bitrate=str(target_bitrate) + "k")
+
+        print(f"Compressed audio. New file: {output_filename}")
+        return output_filename
+    except Exception as e:
+        print(f"Failed to compress audio: {e}")
+        return None
+
 # Transcribes the audio file using OpenAI's transcription service.
 def transcription(OPENAI_API_KEY, audio_filename):
    try:
@@ -68,7 +94,7 @@ def summary(OPENAI_API_KEY, transcript):
            if chunk.choices[0].delta.content is not None:
                print(f"\033[92m{chunk.choices[0].delta.content}\033[0m", end="", flush=True)

-        print("\nSummarized transcript.")
+        print("Summarized transcript.")
    except Exception as e:
        print(f"\033[91mFailed to summarize transcript: {e}\033[0m")
        return None
@@ -94,10 +120,11 @@ def main():
        if OPENAI_API_KEY is not None:
            audio_filename = download(video_url)
            if audio_filename is not None:
-                transcript = transcription(OPENAI_API_KEY, audio_filename)
+                compressed_audio_filename = compress_audio(audio_filename)
+                transcript = transcription(OPENAI_API_KEY, compressed_audio_filename)
                if transcript is not None:
                    summary(OPENAI_API_KEY, transcript)
-                cleanup(audio_filename)
+                cleanup(compressed_audio_filename)
    except Exception as e:
        print(f"\033[91m{e}\033[0m")
    finally: