This commit is contained in:
Leonard Excoffier
2024-08-25 19:52:34 -04:00
commit 20a2866ecd
2 changed files with 63 additions and 0 deletions

60
main.py Normal file
View File

@@ -0,0 +1,60 @@
import os
import zipfile
import requests
def download_file(url, filename):
"""
Download a file from a URL and save it locally.
"""
headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"}
print(f"Starting download: {filename}")
response = requests.get(url, headers=headers, stream=True)
response.raise_for_status() # Check if the request was successful
with open(filename, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"Download complete: {filename}")
def extract_zip(source_filename, destination_folder):
"""
Extract a ZIP file into a target folder.
"""
print(f"Starting extraction: {source_filename} -> {destination_folder}")
with zipfile.ZipFile(source_filename, "r") as zip_ref:
zip_ref.extractall(destination_folder)
print(f"Extraction complete: {destination_folder}")
# Directory where files will be downloaded and extracted
SEC_DATA_DIR = "sec_data"
# Define the subfolders inside sec_data
COMPANYFACTS_DIR = os.path.join(SEC_DATA_DIR, "companyfacts")
SUBMISSIONS_DIR = os.path.join(SEC_DATA_DIR, "submissions")
# Create the directories if they don't exist
os.makedirs(COMPANYFACTS_DIR, exist_ok=True)
os.makedirs(SUBMISSIONS_DIR, exist_ok=True)
# File URLs
COMPANYFACTS_URL = (
"https://www.sec.gov/Archives/edgar/daily-index/xbrl/companyfacts.zip"
)
SUBMISSIONS_URL = (
"https://www.sec.gov/Archives/edgar/daily-index/bulkdata/submissions.zip"
)
# File paths to save the zip files
companyfacts_zip = os.path.join(SEC_DATA_DIR, "companyfacts.zip")
submissions_zip = os.path.join(SEC_DATA_DIR, "submissions.zip")
# Download the files
download_file(COMPANYFACTS_URL, companyfacts_zip)
download_file(SUBMISSIONS_URL, submissions_zip)
# Extract the files into respective directories
extract_zip(companyfacts_zip, COMPANYFACTS_DIR)
extract_zip(submissions_zip, SUBMISSIONS_DIR)