commit 20a2866ecd4016e0311023908519a6f4766f9e59 Author: Leonard Excoffier <48970393+excoffierleonard@users.noreply.github.com> Date: Sun Aug 25 19:52:34 2024 -0400 batman diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f8e1378 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.venv +sec_data/ diff --git a/main.py b/main.py new file mode 100644 index 0000000..7f31bad --- /dev/null +++ b/main.py @@ -0,0 +1,60 @@ +import os +import zipfile + +import requests + + +def download_file(url, filename): + """ + Download a file from a URL and save it locally. + """ + headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"} + print(f"Starting download: {filename}") + response = requests.get(url, headers=headers, stream=True) + response.raise_for_status() # Check if the request was successful + with open(filename, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + print(f"Download complete: {filename}") + + +def extract_zip(source_filename, destination_folder): + """ + Extract a ZIP file into a target folder. + """ + print(f"Starting extraction: {source_filename} -> {destination_folder}") + with zipfile.ZipFile(source_filename, "r") as zip_ref: + zip_ref.extractall(destination_folder) + print(f"Extraction complete: {destination_folder}") + + +# Directory where files will be downloaded and extracted +SEC_DATA_DIR = "sec_data" + +# Define the subfolders inside sec_data +COMPANYFACTS_DIR = os.path.join(SEC_DATA_DIR, "companyfacts") +SUBMISSIONS_DIR = os.path.join(SEC_DATA_DIR, "submissions") + +# Create the directories if they don't exist +os.makedirs(COMPANYFACTS_DIR, exist_ok=True) +os.makedirs(SUBMISSIONS_DIR, exist_ok=True) + +# File URLs +COMPANYFACTS_URL = ( + "https://www.sec.gov/Archives/edgar/daily-index/xbrl/companyfacts.zip" +) +SUBMISSIONS_URL = ( + "https://www.sec.gov/Archives/edgar/daily-index/bulkdata/submissions.zip" +) + +# File paths to save the zip files +companyfacts_zip = os.path.join(SEC_DATA_DIR, "companyfacts.zip") +submissions_zip = os.path.join(SEC_DATA_DIR, "submissions.zip") + +# Download the files +download_file(COMPANYFACTS_URL, companyfacts_zip) +download_file(SUBMISSIONS_URL, submissions_zip) + +# Extract the files into respective directories +extract_zip(companyfacts_zip, COMPANYFACTS_DIR) +extract_zip(submissions_zip, SUBMISSIONS_DIR)