feat: script to write info to db seems to work, have no idea how tho, need to generalize with a for loop for all files

This commit is contained in:
Leonard Excoffier
2024-08-29 21:51:05 -04:00
parent 750d33b976
commit 5946ff73bd

View File

@@ -1,24 +1,100 @@
import mariadb
import json
from dotenv import load_dotenv
import os
# Step 1: Load the JSON data from a file
with open('CIK0000320193.json', 'r') as file:
# Load environment variables from .env file
load_dotenv()
def connect_to_db():
try:
# Read the needed variables from the environment
conn = mariadb.connect(
user=os.getenv("DB_USER"),
password=os.getenv("DB_PASSWORD"),
host=os.getenv("DB_HOST"),
port=int(os.getenv("DB_PORT")),
database=os.getenv("DB_NAME")
)
return conn
except mariadb.Error as e:
print(f"Error connecting to MariaDB: {e}")
return None
def insert_entity(cursor, cik, entity_name):
cursor.execute(
"INSERT IGNORE INTO entities (cik, name) VALUES (?, ?)", (cik, entity_name))
def insert_fact(cursor, taxonomy, fact_id, label, description, unit):
cursor.execute(
"INSERT IGNORE INTO facts (id, taxonomy, label, description, unit) VALUES (?, ?, ?, ?, ?)",
(fact_id, taxonomy, label, description, unit)
)
def insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame):
cursor.execute(
"""INSERT IGNORE INTO data (cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
)
def parse_json_and_insert_data(file_path, cursor):
with open(file_path, 'r') as file:
data = json.load(file)
# Step 2: Access the 'facts' section
facts_section = data.get('facts', {})
# Insert the entity
cik = data['cik']
entity_name = data['entityName']
insert_entity(cursor, cik, entity_name)
# Step 3: Initialize an empty list to store labels
labels_list = []
# Iterate over facts
for taxonomy, fact_details in data['facts'].items():
for fact_id, fact in fact_details.items():
# Get fact details
label = fact.get('label')
description = fact.get('description')
# Step 4: Iterate over each category in the 'facts' section
for category, facts in facts_section.items():
for fact_name, fact_data in facts.items():
label = fact_data.get('label')
if label:
labels_list.append(label)
for unit, unit_vals in fact.get('units', {}).items():
# Insert fact (taxonomy level doesn't seem directly stored in JSON)
insert_fact(cursor, taxonomy, fact_id, label, description, unit)
# Step 5: Output the list of labels
print("List of all labels:")
for label in labels_list:
print(label)
# Insert each data point
for entry in unit_vals:
start = entry.get('start', None)
end = entry['end']
val = entry['val']
accn = entry['accn']
fy = entry['fy']
fp = entry['fp']
form = entry['form']
filed = entry['filed']
frame = entry.get('frame', None)
insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
def main():
# Connect to the database
conn = connect_to_db()
if conn is None:
return
try:
cursor = conn.cursor()
# Load JSON and insert data
json_file_path = 'CIK0000320193.json'
parse_json_and_insert_data(json_file_path, cursor)
# Commit the transaction
conn.commit()
except Exception as e:
print(f"Error occurred: {e}")
conn.rollback()
finally:
cursor.close()
conn.close()
print("Connection closed")
if __name__ == "__main__":
main()