feat: downloads and extract the zip.
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -4,4 +4,6 @@ sec_data/
|
|||||||
CIK*
|
CIK*
|
||||||
.vscode
|
.vscode
|
||||||
stockdb.session.sql
|
stockdb.session.sql
|
||||||
temp.json
|
temp.json
|
||||||
|
dataset/
|
||||||
|
2024q2.zip
|
||||||
23
fs_datasets.py
Normal file
23
fs_datasets.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import requests
|
||||||
|
import zipfile
|
||||||
|
import os
|
||||||
|
|
||||||
|
def download_file(url, filename):
|
||||||
|
headers = {"User-Agent": "LeonardExcoffier/1.0 (excoffier.leonard@gmail.com)"}
|
||||||
|
response = requests.get(url, headers=headers, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
with open(filename, "wb") as file:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
file.write(chunk)
|
||||||
|
|
||||||
|
def extract_zip(source_filename, destination_folder):
|
||||||
|
with zipfile.ZipFile(source_filename, "r") as zip_ref:
|
||||||
|
zip_ref.extractall(destination_folder)
|
||||||
|
|
||||||
|
dataset_dir = "dataset"
|
||||||
|
dataset_link = "https://www.sec.gov/files/dera/data/financial-statement-data-sets/2024q2.zip"
|
||||||
|
dataset_zip = "2024q2.zip"
|
||||||
|
|
||||||
|
os.makedirs(dataset_dir, exist_ok=True)
|
||||||
|
download_file(dataset_link, dataset_zip)
|
||||||
|
extract_zip(dataset_zip, dataset_dir)
|
||||||
Reference in New Issue
Block a user