batman
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
.env
|
||||||
|
.venv
|
||||||
|
sec_data/
|
||||||
60
main.py
Normal file
60
main.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import os
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def download_file(url, filename):
|
||||||
|
"""
|
||||||
|
Download a file from a URL and save it locally.
|
||||||
|
"""
|
||||||
|
headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"}
|
||||||
|
print(f"Starting download: {filename}")
|
||||||
|
response = requests.get(url, headers=headers, stream=True)
|
||||||
|
response.raise_for_status() # Check if the request was successful
|
||||||
|
with open(filename, "wb") as file:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
file.write(chunk)
|
||||||
|
print(f"Download complete: {filename}")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_zip(source_filename, destination_folder):
|
||||||
|
"""
|
||||||
|
Extract a ZIP file into a target folder.
|
||||||
|
"""
|
||||||
|
print(f"Starting extraction: {source_filename} -> {destination_folder}")
|
||||||
|
with zipfile.ZipFile(source_filename, "r") as zip_ref:
|
||||||
|
zip_ref.extractall(destination_folder)
|
||||||
|
print(f"Extraction complete: {destination_folder}")
|
||||||
|
|
||||||
|
|
||||||
|
# Directory where files will be downloaded and extracted
|
||||||
|
SEC_DATA_DIR = "sec_data"
|
||||||
|
|
||||||
|
# Define the subfolders inside sec_data
|
||||||
|
COMPANYFACTS_DIR = os.path.join(SEC_DATA_DIR, "companyfacts")
|
||||||
|
SUBMISSIONS_DIR = os.path.join(SEC_DATA_DIR, "submissions")
|
||||||
|
|
||||||
|
# Create the directories if they don't exist
|
||||||
|
os.makedirs(COMPANYFACTS_DIR, exist_ok=True)
|
||||||
|
os.makedirs(SUBMISSIONS_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# File URLs
|
||||||
|
COMPANYFACTS_URL = (
|
||||||
|
"https://www.sec.gov/Archives/edgar/daily-index/xbrl/companyfacts.zip"
|
||||||
|
)
|
||||||
|
SUBMISSIONS_URL = (
|
||||||
|
"https://www.sec.gov/Archives/edgar/daily-index/bulkdata/submissions.zip"
|
||||||
|
)
|
||||||
|
|
||||||
|
# File paths to save the zip files
|
||||||
|
companyfacts_zip = os.path.join(SEC_DATA_DIR, "companyfacts.zip")
|
||||||
|
submissions_zip = os.path.join(SEC_DATA_DIR, "submissions.zip")
|
||||||
|
|
||||||
|
# Download the files
|
||||||
|
download_file(COMPANYFACTS_URL, companyfacts_zip)
|
||||||
|
download_file(SUBMISSIONS_URL, submissions_zip)
|
||||||
|
|
||||||
|
# Extract the files into respective directories
|
||||||
|
extract_zip(companyfacts_zip, COMPANYFACTS_DIR)
|
||||||
|
extract_zip(submissions_zip, SUBMISSIONS_DIR)
|
||||||
Reference in New Issue
Block a user