23 lines
832 B
Python
23 lines
832 B
Python
import requests
|
|
import zipfile
|
|
import os
|
|
|
|
def download_file(url, filename):
|
|
headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"}
|
|
response = requests.get(url, headers=headers, stream=True)
|
|
response.raise_for_status()
|
|
with open(filename, "wb") as file:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
file.write(chunk)
|
|
|
|
def extract_zip(source_filename, destination_folder):
|
|
with zipfile.ZipFile(source_filename, "r") as zip_ref:
|
|
zip_ref.extractall(destination_folder)
|
|
|
|
dataset_dir = "dataset"
|
|
dataset_link = "https://www.sec.gov/files/dera/data/financial-statement-data-sets/2024q2.zip"
|
|
dataset_zip = "2024q2.zip"
|
|
|
|
os.makedirs(dataset_dir, exist_ok=True)
|
|
download_file(dataset_link, dataset_zip)
|
|
extract_zip(dataset_zip, dataset_dir) |