78 lines
2.3 KiB
Python
78 lines
2.3 KiB
Python
import json
|
|
import pandas as pd
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.types import Integer, String, Float
|
|
from dotenv import load_dotenv
|
|
import os
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
# Database connection
|
|
DB_USER = os.getenv('DB_USER')
|
|
DB_PASSWORD = os.getenv('DB_PASSWORD')
|
|
DB_HOST = os.getenv('DB_HOST')
|
|
DB_PORT = os.getenv('DB_PORT')
|
|
DB_NAME = os.getenv('DB_NAME')
|
|
|
|
# Create the connection string
|
|
db_connection_str = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
|
|
engine = create_engine(db_connection_str)
|
|
|
|
# Load the JSON data as before
|
|
with open('CIK0001937441.json') as f:
|
|
data = json.load(f)
|
|
|
|
cik = data.get('cik')
|
|
entity_name = data.get('entityName')
|
|
facts = data.get('facts')
|
|
|
|
rows = []
|
|
|
|
for taxonomy, fact_items in facts.items():
|
|
for fact_id, fact_data in fact_items.items():
|
|
label = fact_data.get('label')
|
|
description = fact_data.get('description')
|
|
units = fact_data.get('units')
|
|
|
|
for unit, details_list in units.items():
|
|
for details in details_list:
|
|
row = {
|
|
'entity_cik': cik,
|
|
'entity_name': entity_name,
|
|
'fact_id': fact_id,
|
|
'fact_taxonomy': taxonomy,
|
|
'fact_label': label,
|
|
'fact_description': description,
|
|
'fact_unit': unit
|
|
}
|
|
row.update(details)
|
|
rows.append(row)
|
|
|
|
# Create DataFrame from collected rows
|
|
df = pd.DataFrame(rows)
|
|
|
|
# Define DataFrame to SQL types (optional, but recommended for performance)
|
|
dtype_map = {
|
|
'entity_cik': String(10),
|
|
'entity_name': String(255),
|
|
'fact_id': String(255),
|
|
'fact_taxonomy': String(255),
|
|
'fact_label': String(255),
|
|
'fact_description': String(255),
|
|
'fact_unit': String(50),
|
|
'start': String(10),
|
|
'end': String(10),
|
|
'val': Float,
|
|
'accn': String(50),
|
|
'fy': Integer,
|
|
'fp': String(10),
|
|
'form': String(10),
|
|
'filed': String(10),
|
|
'frame': String(50)
|
|
}
|
|
|
|
# Write DataFrame to the 'data' table in the MariaDB database, create the table if it doesn't exist
|
|
df.to_sql('data', con=engine, if_exists='replace', index=False, dtype=dtype_map)
|
|
|
|
print("Data successfully written to the database.") |