feat: now checks if cik is already present before writing to db, can be deactivated if want to update data.

This commit is contained in:
Leonard Excoffier
2024-08-29 22:04:47 -04:00
parent a17d73f336
commit 0f9a42eb7e

View File

@@ -38,20 +38,31 @@ def insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed
(cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) (cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
) )
def cik_exists(cursor, cik):
cursor.execute("SELECT 1 FROM entities WHERE cik = ?", (cik,))
return cursor.fetchone() is not None
def parse_json_and_insert_data(file_path): def parse_json_and_insert_data(file_path):
with open(file_path, 'r') as file: with open(file_path, 'r') as file:
data = json.load(file) data = json.load(file)
# Get the connection and cursor cik = data.get('cik')
# Start a new connection for each file
conn = connect_to_db() conn = connect_to_db()
if conn is None: if conn is None:
return False return False
try: try:
cursor = conn.cursor() cursor = conn.cursor()
# Optional: Check if cik already exists in the database.
# You can comment this block out if you do not want this check.
if cik_exists(cursor, cik):
print(f"CIK {cik} already exists in the database. Skipping file {file_path}.")
return False
# Insert the entity # Insert the entity
cik = data.get('cik')
entity_name = data.get('entityName') entity_name = data.get('entityName')
insert_entity(cursor, cik, entity_name) insert_entity(cursor, cik, entity_name)
@@ -63,7 +74,7 @@ def parse_json_and_insert_data(file_path):
description = fact.get('description') description = fact.get('description')
for unit, unit_vals in fact.get('units', {}).items(): for unit, unit_vals in fact.get('units', {}).items():
# Insert fact (taxonomy level doesn't seem directly stored in JSON) # Insert fact
insert_fact(cursor, taxonomy, fact_id, label, description, unit) insert_fact(cursor, taxonomy, fact_id, label, description, unit)
# Insert each data point # Insert each data point
@@ -80,7 +91,7 @@ def parse_json_and_insert_data(file_path):
insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame) insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
# Commit the transaction for the whole file # Commit transaction
conn.commit() conn.commit()
return True return True