import json import pandas as pd from sqlalchemy import create_engine from sqlalchemy.types import Integer, String, Float from dotenv import load_dotenv import os # Load environment variables from .env file load_dotenv() # Database connection DB_USER = os.getenv('DB_USER') DB_PASSWORD = os.getenv('DB_PASSWORD') DB_HOST = os.getenv('DB_HOST') DB_PORT = os.getenv('DB_PORT') DB_NAME = os.getenv('DB_NAME') # Create the connection string db_connection_str = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}' engine = create_engine(db_connection_str) # Load the JSON data as before with open('CIK0001937441.json') as f: data = json.load(f) cik = data.get('cik') entity_name = data.get('entityName') facts = data.get('facts') rows = [] for taxonomy, fact_items in facts.items(): for fact_id, fact_data in fact_items.items(): label = fact_data.get('label') description = fact_data.get('description') units = fact_data.get('units') for unit, details_list in units.items(): for details in details_list: row = { 'entity_cik': cik, 'entity_name': entity_name, 'fact_id': fact_id, 'fact_taxonomy': taxonomy, 'fact_label': label, 'fact_description': description, 'fact_unit': unit } row.update(details) rows.append(row) # Create DataFrame from collected rows df = pd.DataFrame(rows) # Define DataFrame to SQL types (optional, but recommended for performance) dtype_map = { 'entity_cik': String(10), 'entity_name': String(255), 'fact_id': String(255), 'fact_taxonomy': String(255), 'fact_label': String(255), 'fact_description': String(255), 'fact_unit': String(50), 'start': String(10), 'end': String(10), 'val': Float, 'accn': String(50), 'fy': Integer, 'fp': String(10), 'form': String(10), 'filed': String(10), 'frame': String(50) } # Write DataFrame to the 'data' table in the MariaDB database, create the table if it doesn't exist df.to_sql('data', con=engine, if_exists='replace', index=False, dtype=dtype_map) print("Data successfully written to the database.")