feat: script cycle works
This commit is contained in:
110
new_write.py
110
new_write.py
@@ -1,9 +1,9 @@
|
|||||||
import json
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine, Integer, String, Float, Date, Text
|
||||||
from sqlalchemy.types import Integer, String, Float
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
import glob
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -19,24 +19,66 @@ DB_NAME = os.getenv('DB_NAME')
|
|||||||
db_connection_str = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
|
db_connection_str = f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
|
||||||
engine = create_engine(db_connection_str)
|
engine = create_engine(db_connection_str)
|
||||||
|
|
||||||
# Load the JSON data as before
|
# List all JSON files in the target directory
|
||||||
with open('CIK0001937441.json') as f:
|
json_files = glob.glob('./sec_data/companyfacts/*.json')
|
||||||
|
|
||||||
|
# Initialize a counter
|
||||||
|
file_count = len(json_files)
|
||||||
|
current_file = 1
|
||||||
|
|
||||||
|
# Data type mapping for the DataFrame to SQL conversion
|
||||||
|
dtype_map = {
|
||||||
|
'entity_cik': Integer,
|
||||||
|
'entity_name': String(255),
|
||||||
|
'fact_id': String(255),
|
||||||
|
'fact_taxonomy': String(255),
|
||||||
|
'fact_label': String(255),
|
||||||
|
'fact_description': Text,
|
||||||
|
'fact_unit': String(255),
|
||||||
|
'start': Date,
|
||||||
|
'end': Date,
|
||||||
|
'val': Float,
|
||||||
|
'accn': String(50),
|
||||||
|
'fy': Integer,
|
||||||
|
'fp': String(255),
|
||||||
|
'form': String(255),
|
||||||
|
'filed': Date,
|
||||||
|
'frame': String(255)
|
||||||
|
}
|
||||||
|
|
||||||
|
for json_file in json_files:
|
||||||
|
try:
|
||||||
|
# Load the JSON data
|
||||||
|
with open(json_file) as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
|
|
||||||
cik = data.get('cik')
|
# Informing the user about the current file being processed
|
||||||
entity_name = data.get('entityName')
|
print(f"Processing file {current_file}/{file_count}: {json_file}")
|
||||||
facts = data.get('facts')
|
current_file += 1
|
||||||
|
|
||||||
rows = []
|
# Check if the JSON has the keys we're interested in
|
||||||
|
cik = data.get('cik', None)
|
||||||
|
entity_name = data.get('entityName', None)
|
||||||
|
facts = data.get('facts', {})
|
||||||
|
|
||||||
for taxonomy, fact_items in facts.items():
|
# Initialize a list to hold rows
|
||||||
|
rows = []
|
||||||
|
|
||||||
|
# Skip files that don't have facts to process
|
||||||
|
if not facts:
|
||||||
|
print(f"File {json_file} has no facts to process. Skipping...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process the facts dynamically
|
||||||
|
for taxonomy, fact_items in facts.items():
|
||||||
for fact_id, fact_data in fact_items.items():
|
for fact_id, fact_data in fact_items.items():
|
||||||
label = fact_data.get('label')
|
label = fact_data.get('label', None)
|
||||||
description = fact_data.get('description')
|
description = fact_data.get('description', None)
|
||||||
units = fact_data.get('units')
|
units = fact_data.get('units', {})
|
||||||
|
|
||||||
for unit, details_list in units.items():
|
for unit, details_list in units.items():
|
||||||
for details in details_list:
|
for details in details_list:
|
||||||
|
# Generate row dictionary dynamically, only updating non-None values
|
||||||
row = {
|
row = {
|
||||||
'entity_cik': cik,
|
'entity_cik': cik,
|
||||||
'entity_name': entity_name,
|
'entity_name': entity_name,
|
||||||
@@ -46,33 +88,27 @@ for taxonomy, fact_items in facts.items():
|
|||||||
'fact_description': description,
|
'fact_description': description,
|
||||||
'fact_unit': unit
|
'fact_unit': unit
|
||||||
}
|
}
|
||||||
row.update(details)
|
|
||||||
|
# Add the remaining keys in details to row
|
||||||
|
for key, value in details.items():
|
||||||
|
row[key] = value
|
||||||
|
|
||||||
|
# Append the row to rows list
|
||||||
rows.append(row)
|
rows.append(row)
|
||||||
|
|
||||||
# Create DataFrame from collected rows
|
# Create DataFrame only if there are rows
|
||||||
df = pd.DataFrame(rows)
|
if rows:
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
|
||||||
# Define DataFrame to SQL types (optional, but recommended for performance)
|
# Write DataFrame to the 'data' table, appending if table exists
|
||||||
dtype_map = {
|
df.to_sql('data', con=engine, if_exists='append', index=False, dtype=dtype_map)
|
||||||
'entity_cik': String(10),
|
else:
|
||||||
'entity_name': String(255),
|
print(f"No data rows were created for file {json_file}. Skipping insert...")
|
||||||
'fact_id': String(255),
|
|
||||||
'fact_taxonomy': String(255),
|
|
||||||
'fact_label': String(255),
|
|
||||||
'fact_description': String(255),
|
|
||||||
'fact_unit': String(50),
|
|
||||||
'start': String(10),
|
|
||||||
'end': String(10),
|
|
||||||
'val': Float,
|
|
||||||
'accn': String(50),
|
|
||||||
'fy': Integer,
|
|
||||||
'fp': String(10),
|
|
||||||
'form': String(10),
|
|
||||||
'filed': String(10),
|
|
||||||
'frame': String(50)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Write DataFrame to the 'data' table in the MariaDB database, create the table if it doesn't exist
|
except json.JSONDecodeError as e:
|
||||||
df.to_sql('data', con=engine, if_exists='replace', index=False, dtype=dtype_map)
|
print(f"Failed to process file {json_file}: Invalid JSON format. Error: {e}")
|
||||||
|
|
||||||
print("Data successfully written to the database.")
|
except Exception as e:
|
||||||
|
print(f"An error occurred while processing file {json_file}: {e}")
|
||||||
|
|
||||||
|
print("Processing complete. All files have been handled.")
|
||||||
Reference in New Issue
Block a user