feat: now loops over all the facts jsons

This commit is contained in:
Leonard Excoffier
2024-08-29 22:03:05 -04:00
parent 5946ff73bd
commit a17d73f336

View File

@@ -1,14 +1,14 @@
import os
import mariadb import mariadb
import json import json
from dotenv import load_dotenv from dotenv import load_dotenv
import os
# Load environment variables from .env file # Load environment variables from .env file
load_dotenv() load_dotenv()
def connect_to_db(): def connect_to_db():
try: try:
# Read the needed variables from the environment # Read the connection parameters from the environment
conn = mariadb.connect( conn = mariadb.connect(
user=os.getenv("DB_USER"), user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"), password=os.getenv("DB_PASSWORD"),
@@ -38,63 +38,81 @@ def insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed
(cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) (cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
) )
def parse_json_and_insert_data(file_path, cursor): def parse_json_and_insert_data(file_path):
with open(file_path, 'r') as file: with open(file_path, 'r') as file:
data = json.load(file) data = json.load(file)
# Insert the entity # Get the connection and cursor
cik = data['cik'] conn = connect_to_db()
entity_name = data['entityName'] if conn is None:
insert_entity(cursor, cik, entity_name) return False
# Iterate over facts try:
for taxonomy, fact_details in data['facts'].items(): cursor = conn.cursor()
for fact_id, fact in fact_details.items():
# Get fact details
label = fact.get('label')
description = fact.get('description')
for unit, unit_vals in fact.get('units', {}).items(): # Insert the entity
# Insert fact (taxonomy level doesn't seem directly stored in JSON) cik = data.get('cik')
insert_fact(cursor, taxonomy, fact_id, label, description, unit) entity_name = data.get('entityName')
insert_entity(cursor, cik, entity_name)
# Insert each data point # Iterate over facts
for entry in unit_vals: for taxonomy, fact_details in data['facts'].items():
start = entry.get('start', None) for fact_id, fact in fact_details.items():
end = entry['end'] # Get fact details
val = entry['val'] label = fact.get('label')
accn = entry['accn'] description = fact.get('description')
fy = entry['fy']
fp = entry['fp']
form = entry['form']
filed = entry['filed']
frame = entry.get('frame', None)
insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) for unit, unit_vals in fact.get('units', {}).items():
# Insert fact (taxonomy level doesn't seem directly stored in JSON)
insert_fact(cursor, taxonomy, fact_id, label, description, unit)
# Insert each data point
for entry in unit_vals:
start = entry.get('start', None)
end = entry['end']
val = entry['val']
accn = entry['accn']
fy = entry['fy']
fp = entry['fp']
form = entry['form']
filed = entry['filed']
frame = entry.get('frame', None)
insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
# Commit the transaction for the whole file
conn.commit()
return True
except Exception as e:
print(f"Error occurred while processing {file_path}: {e}")
conn.rollback()
return False
finally:
cursor.close()
conn.close()
def process_all_files_in_directory(directory_path):
files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
total_files = len(files)
processed_files = 0
for idx, file_name in enumerate(files, start=1):
file_path = os.path.join(directory_path, file_name)
print(f"Processing file {idx} of {total_files}: {file_name}")
if parse_json_and_insert_data(file_path):
processed_files += 1
print(f"Successfully processed {file_name}")
else:
print(f"Failed to process {file_name}")
print(f"Finished processing {processed_files} out of {total_files} files.")
def main(): def main():
# Connect to the database # Process all JSON files in the directory
conn = connect_to_db() directory_path = './sec_data/companyfacts/'
if conn is None: process_all_files_in_directory(directory_path)
return
try:
cursor = conn.cursor()
# Load JSON and insert data
json_file_path = 'CIK0000320193.json'
parse_json_and_insert_data(json_file_path, cursor)
# Commit the transaction
conn.commit()
except Exception as e:
print(f"Error occurred: {e}")
conn.rollback()
finally:
cursor.close()
conn.close()
print("Connection closed")
if __name__ == "__main__": if __name__ == "__main__":
main() main()