From 0f9a42eb7e812d88d719fc60306debc613c198e4 Mon Sep 17 00:00:00 2001 From: Leonard Excoffier <48970393+excoffierleonard@users.noreply.github.com> Date: Thu, 29 Aug 2024 22:04:47 -0400 Subject: [PATCH] feat: now checks if cik is already present before writing to db, can be deactivated if want to update data. --- write_to_db.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/write_to_db.py b/write_to_db.py index 72b3e6f..9c84bb4 100644 --- a/write_to_db.py +++ b/write_to_db.py @@ -38,20 +38,31 @@ def insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed (cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) ) +def cik_exists(cursor, cik): + cursor.execute("SELECT 1 FROM entities WHERE cik = ?", (cik,)) + return cursor.fetchone() is not None + def parse_json_and_insert_data(file_path): with open(file_path, 'r') as file: data = json.load(file) - # Get the connection and cursor + cik = data.get('cik') + + # Start a new connection for each file conn = connect_to_db() if conn is None: return False - + try: cursor = conn.cursor() + # Optional: Check if cik already exists in the database. + # You can comment this block out if you do not want this check. + if cik_exists(cursor, cik): + print(f"CIK {cik} already exists in the database. Skipping file {file_path}.") + return False + # Insert the entity - cik = data.get('cik') entity_name = data.get('entityName') insert_entity(cursor, cik, entity_name) @@ -63,7 +74,7 @@ def parse_json_and_insert_data(file_path): description = fact.get('description') for unit, unit_vals in fact.get('units', {}).items(): - # Insert fact (taxonomy level doesn't seem directly stored in JSON) + # Insert fact insert_fact(cursor, taxonomy, fact_id, label, description, unit) # Insert each data point @@ -80,7 +91,7 @@ def parse_json_and_insert_data(file_path): insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) - # Commit the transaction for the whole file + # Commit transaction conn.commit() return True