batman
This commit is contained in:
60
main.py
Normal file
60
main.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import os
|
||||
import zipfile
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def download_file(url, filename):
|
||||
"""
|
||||
Download a file from a URL and save it locally.
|
||||
"""
|
||||
headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"}
|
||||
print(f"Starting download: {filename}")
|
||||
response = requests.get(url, headers=headers, stream=True)
|
||||
response.raise_for_status() # Check if the request was successful
|
||||
with open(filename, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
file.write(chunk)
|
||||
print(f"Download complete: {filename}")
|
||||
|
||||
|
||||
def extract_zip(source_filename, destination_folder):
|
||||
"""
|
||||
Extract a ZIP file into a target folder.
|
||||
"""
|
||||
print(f"Starting extraction: {source_filename} -> {destination_folder}")
|
||||
with zipfile.ZipFile(source_filename, "r") as zip_ref:
|
||||
zip_ref.extractall(destination_folder)
|
||||
print(f"Extraction complete: {destination_folder}")
|
||||
|
||||
|
||||
# Directory where files will be downloaded and extracted
|
||||
SEC_DATA_DIR = "sec_data"
|
||||
|
||||
# Define the subfolders inside sec_data
|
||||
COMPANYFACTS_DIR = os.path.join(SEC_DATA_DIR, "companyfacts")
|
||||
SUBMISSIONS_DIR = os.path.join(SEC_DATA_DIR, "submissions")
|
||||
|
||||
# Create the directories if they don't exist
|
||||
os.makedirs(COMPANYFACTS_DIR, exist_ok=True)
|
||||
os.makedirs(SUBMISSIONS_DIR, exist_ok=True)
|
||||
|
||||
# File URLs
|
||||
COMPANYFACTS_URL = (
|
||||
"https://www.sec.gov/Archives/edgar/daily-index/xbrl/companyfacts.zip"
|
||||
)
|
||||
SUBMISSIONS_URL = (
|
||||
"https://www.sec.gov/Archives/edgar/daily-index/bulkdata/submissions.zip"
|
||||
)
|
||||
|
||||
# File paths to save the zip files
|
||||
companyfacts_zip = os.path.join(SEC_DATA_DIR, "companyfacts.zip")
|
||||
submissions_zip = os.path.join(SEC_DATA_DIR, "submissions.zip")
|
||||
|
||||
# Download the files
|
||||
download_file(COMPANYFACTS_URL, companyfacts_zip)
|
||||
download_file(SUBMISSIONS_URL, submissions_zip)
|
||||
|
||||
# Extract the files into respective directories
|
||||
extract_zip(companyfacts_zip, COMPANYFACTS_DIR)
|
||||
extract_zip(submissions_zip, SUBMISSIONS_DIR)
|
||||
Reference in New Issue
Block a user