feat: seem to be able to write to a db.
This commit is contained in:
@@ -39,3 +39,6 @@ CREATE TABLE IF NOT EXISTS data (
|
|||||||
fact_description TEXT,
|
fact_description TEXT,
|
||||||
fact_unit VARCHAR(255)
|
fact_unit VARCHAR(255)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
-- @block
|
||||||
|
CREATE TABLE IF NOT EXISTS data ();
|
||||||
58
new_write.py
58
new_write.py
@@ -1,26 +1,40 @@
|
|||||||
import json
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.types import Integer, String, Float
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
|
||||||
# Step 1: Load the JSON data
|
# Load environment variables from .env file
|
||||||
with open('CIK0000320193.json') as f:
|
load_dotenv()
|
||||||
|
|
||||||
|
# Database connection
|
||||||
|
DB_USER = os.getenv('DB_USER')
|
||||||
|
DB_PASSWORD = os.getenv('DB_PASSWORD')
|
||||||
|
DB_HOST = os.getenv('DB_HOST')
|
||||||
|
DB_PORT = os.getenv('DB_PORT')
|
||||||
|
DB_NAME = os.getenv('DB_NAME')
|
||||||
|
|
||||||
|
# Create the connection string
|
||||||
|
db_connection_str = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
|
||||||
|
engine = create_engine(db_connection_str)
|
||||||
|
|
||||||
|
# Load the JSON data as before
|
||||||
|
with open('CIK0001937441.json') as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
# Step 2: Extract the relevant fields
|
|
||||||
cik = data.get('cik')
|
cik = data.get('cik')
|
||||||
entity_name = data.get('entityName')
|
entity_name = data.get('entityName')
|
||||||
facts = data.get('facts')
|
facts = data.get('facts')
|
||||||
|
|
||||||
# Prepare a list to hold the rows
|
|
||||||
rows = []
|
rows = []
|
||||||
|
|
||||||
# Traverse through the facts dictionary
|
|
||||||
for taxonomy, fact_items in facts.items():
|
for taxonomy, fact_items in facts.items():
|
||||||
for fact_id, fact_data in fact_items.items():
|
for fact_id, fact_data in fact_items.items():
|
||||||
label = fact_data.get('label')
|
label = fact_data.get('label')
|
||||||
description = fact_data.get('description')
|
description = fact_data.get('description')
|
||||||
units = fact_data.get('units')
|
units = fact_data.get('units')
|
||||||
|
|
||||||
# For each unit and its details, add a new row
|
|
||||||
for unit, details_list in units.items():
|
for unit, details_list in units.items():
|
||||||
for details in details_list:
|
for details in details_list:
|
||||||
row = {
|
row = {
|
||||||
@@ -32,15 +46,33 @@ for taxonomy, fact_items in facts.items():
|
|||||||
'fact_description': description,
|
'fact_description': description,
|
||||||
'fact_unit': unit
|
'fact_unit': unit
|
||||||
}
|
}
|
||||||
|
|
||||||
# Include the additional details in the row
|
|
||||||
row.update(details)
|
row.update(details)
|
||||||
|
|
||||||
# Append the row to the rows list
|
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
|
|
||||||
# Step 3: Create the DataFrame
|
# Create DataFrame from collected rows
|
||||||
df = pd.DataFrame(rows)
|
df = pd.DataFrame(rows)
|
||||||
|
|
||||||
# Step 4: Output the head of the DataFrame
|
# Define DataFrame to SQL types (optional, but recommended for performance)
|
||||||
print(df.head())
|
dtype_map = {
|
||||||
|
'entity_cik': String(10),
|
||||||
|
'entity_name': String(255),
|
||||||
|
'fact_id': String(255),
|
||||||
|
'fact_taxonomy': String(255),
|
||||||
|
'fact_label': String(255),
|
||||||
|
'fact_description': String(255),
|
||||||
|
'fact_unit': String(50),
|
||||||
|
'start': String(10),
|
||||||
|
'end': String(10),
|
||||||
|
'val': Float,
|
||||||
|
'accn': String(50),
|
||||||
|
'fy': Integer,
|
||||||
|
'fp': String(10),
|
||||||
|
'form': String(10),
|
||||||
|
'filed': String(10),
|
||||||
|
'frame': String(50)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Write DataFrame to the 'data' table in the MariaDB database, create the table if it doesn't exist
|
||||||
|
df.to_sql('data', con=engine, if_exists='replace', index=False, dtype=dtype_map)
|
||||||
|
|
||||||
|
print("Data successfully written to the database.")
|
||||||
Reference in New Issue
Block a user