import os import zipfile import requests def download_file(url, filename): """ Download a file from a URL and save it locally. """ headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"} print(f"Starting download: {filename}") response = requests.get(url, headers=headers, stream=True) response.raise_for_status() # Check if the request was successful with open(filename, "wb") as file: for chunk in response.iter_content(chunk_size=8192): file.write(chunk) print(f"Download complete: {filename}") def extract_zip(source_filename, destination_folder): """ Extract a ZIP file into a target folder. """ print(f"Starting extraction: {source_filename} -> {destination_folder}") with zipfile.ZipFile(source_filename, "r") as zip_ref: zip_ref.extractall(destination_folder) print(f"Extraction complete: {destination_folder}") # Directory where files will be downloaded and extracted SEC_DATA_DIR = "sec_data" # Define the subfolders inside sec_data COMPANYFACTS_DIR = os.path.join(SEC_DATA_DIR, "companyfacts") SUBMISSIONS_DIR = os.path.join(SEC_DATA_DIR, "submissions") # Create the directories if they don't exist os.makedirs(COMPANYFACTS_DIR, exist_ok=True) os.makedirs(SUBMISSIONS_DIR, exist_ok=True) # File URLs COMPANYFACTS_URL = ( "https://www.sec.gov/Archives/edgar/daily-index/xbrl/companyfacts.zip" ) SUBMISSIONS_URL = ( "https://www.sec.gov/Archives/edgar/daily-index/bulkdata/submissions.zip" ) # File paths to save the zip files companyfacts_zip = os.path.join(SEC_DATA_DIR, "companyfacts.zip") submissions_zip = os.path.join(SEC_DATA_DIR, "submissions.zip") # Download the files download_file(COMPANYFACTS_URL, companyfacts_zip) download_file(SUBMISSIONS_URL, submissions_zip) # Extract the files into respective directories extract_zip(companyfacts_zip, COMPANYFACTS_DIR) extract_zip(submissions_zip, SUBMISSIONS_DIR)