diff --git a/.gitignore b/.gitignore index 4d56f9a..f41b9c8 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ sec_data/ CIK* .vscode stockdb.session.sql -temp.json \ No newline at end of file +temp.json +dataset/ +2024q2.zip \ No newline at end of file diff --git a/fs_datasets.py b/fs_datasets.py new file mode 100644 index 0000000..8a588ec --- /dev/null +++ b/fs_datasets.py @@ -0,0 +1,23 @@ +import requests +import zipfile +import os + +def download_file(url, filename): + headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"} + response = requests.get(url, headers=headers, stream=True) + response.raise_for_status() + with open(filename, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + +def extract_zip(source_filename, destination_folder): + with zipfile.ZipFile(source_filename, "r") as zip_ref: + zip_ref.extractall(destination_folder) + +dataset_dir = "dataset" +dataset_link = "https://www.sec.gov/files/dera/data/financial-statement-data-sets/2024q2.zip" +dataset_zip = "2024q2.zip" + +os.makedirs(dataset_dir, exist_ok=True) +download_file(dataset_link, dataset_zip) +extract_zip(dataset_zip, dataset_dir) \ No newline at end of file