feat: seem to be able to write to a db.

This commit is contained in:
Leonard Excoffier
2024-08-31 00:17:37 -04:00
parent 0fbbb37ef7
commit 765272f896
2 changed files with 49 additions and 14 deletions

View File

@@ -1,26 +1,40 @@
import json
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.types import Integer, String, Float
from dotenv import load_dotenv
import os
# Step 1: Load the JSON data
with open('CIK0000320193.json') as f:
# Load environment variables from .env file
load_dotenv()
# Database connection
DB_USER = os.getenv('DB_USER')
DB_PASSWORD = os.getenv('DB_PASSWORD')
DB_HOST = os.getenv('DB_HOST')
DB_PORT = os.getenv('DB_PORT')
DB_NAME = os.getenv('DB_NAME')
# Create the connection string
db_connection_str = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
engine = create_engine(db_connection_str)
# Load the JSON data as before
with open('CIK0001937441.json') as f:
data = json.load(f)
# Step 2: Extract the relevant fields
cik = data.get('cik')
entity_name = data.get('entityName')
facts = data.get('facts')
# Prepare a list to hold the rows
rows = []
# Traverse through the facts dictionary
for taxonomy, fact_items in facts.items():
for fact_id, fact_data in fact_items.items():
label = fact_data.get('label')
description = fact_data.get('description')
units = fact_data.get('units')
# For each unit and its details, add a new row
for unit, details_list in units.items():
for details in details_list:
row = {
@@ -32,15 +46,33 @@ for taxonomy, fact_items in facts.items():
'fact_description': description,
'fact_unit': unit
}
# Include the additional details in the row
row.update(details)
# Append the row to the rows list
rows.append(row)
# Step 3: Create the DataFrame
# Create DataFrame from collected rows
df = pd.DataFrame(rows)
# Step 4: Output the head of the DataFrame
print(df.head())
# Define DataFrame to SQL types (optional, but recommended for performance)
dtype_map = {
'entity_cik': String(10),
'entity_name': String(255),
'fact_id': String(255),
'fact_taxonomy': String(255),
'fact_label': String(255),
'fact_description': String(255),
'fact_unit': String(50),
'start': String(10),
'end': String(10),
'val': Float,
'accn': String(50),
'fy': Integer,
'fp': String(10),
'form': String(10),
'filed': String(10),
'frame': String(50)
}
# Write DataFrame to the 'data' table in the MariaDB database, create the table if it doesn't exist
df.to_sql('data', con=engine, if_exists='replace', index=False, dtype=dtype_map)
print("Data successfully written to the database.")