feat: downloads and extract the zip.

This commit is contained in:
Leonard Excoffier
2024-09-10 21:38:43 -04:00
parent 5e38f46e8f
commit e586c2b202
2 changed files with 26 additions and 1 deletions

23
fs_datasets.py Normal file
View File

@@ -0,0 +1,23 @@
import requests
import zipfile
import os
def download_file(url, filename):
headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"}
response = requests.get(url, headers=headers, stream=True)
response.raise_for_status()
with open(filename, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
def extract_zip(source_filename, destination_folder):
with zipfile.ZipFile(source_filename, "r") as zip_ref:
zip_ref.extractall(destination_folder)
dataset_dir = "dataset"
dataset_link = "https://www.sec.gov/files/dera/data/financial-statement-data-sets/2024q2.zip"
dataset_zip = "2024q2.zip"
os.makedirs(dataset_dir, exist_ok=True)
download_file(dataset_link, dataset_zip)
extract_zip(dataset_zip, dataset_dir)