From a17d73f336a8ca37fa795c8e9536c9009562904e Mon Sep 17 00:00:00 2001 From: Leonard Excoffier <48970393+excoffierleonard@users.noreply.github.com> Date: Thu, 29 Aug 2024 22:03:05 -0400 Subject: [PATCH] feat: now loops over all the facts jsons --- write_to_db.py | 118 ++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 50 deletions(-) diff --git a/write_to_db.py b/write_to_db.py index 253c9be..72b3e6f 100644 --- a/write_to_db.py +++ b/write_to_db.py @@ -1,14 +1,14 @@ +import os import mariadb import json from dotenv import load_dotenv -import os # Load environment variables from .env file load_dotenv() def connect_to_db(): try: - # Read the needed variables from the environment + # Read the connection parameters from the environment conn = mariadb.connect( user=os.getenv("DB_USER"), password=os.getenv("DB_PASSWORD"), @@ -38,63 +38,81 @@ def insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed (cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) ) -def parse_json_and_insert_data(file_path, cursor): +def parse_json_and_insert_data(file_path): with open(file_path, 'r') as file: data = json.load(file) - # Insert the entity - cik = data['cik'] - entity_name = data['entityName'] - insert_entity(cursor, cik, entity_name) + # Get the connection and cursor + conn = connect_to_db() + if conn is None: + return False - # Iterate over facts - for taxonomy, fact_details in data['facts'].items(): - for fact_id, fact in fact_details.items(): - # Get fact details - label = fact.get('label') - description = fact.get('description') + try: + cursor = conn.cursor() - for unit, unit_vals in fact.get('units', {}).items(): - # Insert fact (taxonomy level doesn't seem directly stored in JSON) - insert_fact(cursor, taxonomy, fact_id, label, description, unit) + # Insert the entity + cik = data.get('cik') + entity_name = data.get('entityName') + insert_entity(cursor, cik, entity_name) - # Insert each data point - for entry in unit_vals: - start = entry.get('start', None) - end = entry['end'] - val = entry['val'] - accn = entry['accn'] - fy = entry['fy'] - fp = entry['fp'] - form = entry['form'] - filed = entry['filed'] - frame = entry.get('frame', None) + # Iterate over facts + for taxonomy, fact_details in data['facts'].items(): + for fact_id, fact in fact_details.items(): + # Get fact details + label = fact.get('label') + description = fact.get('description') - insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) + for unit, unit_vals in fact.get('units', {}).items(): + # Insert fact (taxonomy level doesn't seem directly stored in JSON) + insert_fact(cursor, taxonomy, fact_id, label, description, unit) + + # Insert each data point + for entry in unit_vals: + start = entry.get('start', None) + end = entry['end'] + val = entry['val'] + accn = entry['accn'] + fy = entry['fy'] + fp = entry['fp'] + form = entry['form'] + filed = entry['filed'] + frame = entry.get('frame', None) + + insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) + + # Commit the transaction for the whole file + conn.commit() + return True + + except Exception as e: + print(f"Error occurred while processing {file_path}: {e}") + conn.rollback() + return False + finally: + cursor.close() + conn.close() + +def process_all_files_in_directory(directory_path): + files = [f for f in os.listdir(directory_path) if f.endswith('.json')] + total_files = len(files) + processed_files = 0 + + for idx, file_name in enumerate(files, start=1): + file_path = os.path.join(directory_path, file_name) + print(f"Processing file {idx} of {total_files}: {file_name}") + + if parse_json_and_insert_data(file_path): + processed_files += 1 + print(f"Successfully processed {file_name}") + else: + print(f"Failed to process {file_name}") + + print(f"Finished processing {processed_files} out of {total_files} files.") def main(): - # Connect to the database - conn = connect_to_db() - if conn is None: - return - - try: - cursor = conn.cursor() - - # Load JSON and insert data - json_file_path = 'CIK0000320193.json' - parse_json_and_insert_data(json_file_path, cursor) - - # Commit the transaction - conn.commit() - - except Exception as e: - print(f"Error occurred: {e}") - conn.rollback() - finally: - cursor.close() - conn.close() - print("Connection closed") + # Process all JSON files in the directory + directory_path = './sec_data/companyfacts/' + process_all_files_in_directory(directory_path) if __name__ == "__main__": main() \ No newline at end of file