feat: now writes all datasets to db
This commit is contained in:
@@ -1,15 +1,34 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
from sqlalchemy import create_engine
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Define a list of file paths for easy modification
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
# Get DB connection parameters from environment
|
||||
DB_USER = os.getenv('DB_USER')
|
||||
DB_PASSWORD = os.getenv('DB_PASSWORD')
|
||||
DB_HOST = os.getenv('DB_HOST')
|
||||
DB_PORT = os.getenv('DB_PORT')
|
||||
DB_NAME = os.getenv('DB_NAME')
|
||||
|
||||
# Create a connection string
|
||||
connection_string = f"mariadb+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
|
||||
|
||||
# Create the SQLAlchemy engine
|
||||
engine = create_engine(connection_string)
|
||||
|
||||
# Define a list of file paths and corresponding table names
|
||||
file_paths = [
|
||||
'sec_data/2024q1/num.txt',
|
||||
'sec_data/2024q1/pre.txt',
|
||||
'sec_data/2024q1/sub.txt',
|
||||
'sec_data/2024q1/tag.txt'
|
||||
('sec_data/2024q1/num.txt', 'num'),
|
||||
('sec_data/2024q1/pre.txt', 'pre'),
|
||||
('sec_data/2024q1/sub.txt', 'sub'),
|
||||
('sec_data/2024q1/tag.txt', 'tag')
|
||||
]
|
||||
|
||||
# Loop through each file and perform analysis
|
||||
for i, file_path in enumerate(file_paths):
|
||||
# Loop through each file and write the data to the database
|
||||
for i, (file_path, table_name) in enumerate(file_paths):
|
||||
print(f"\nAnalyzing {file_path} (File {i+1}/4)...")
|
||||
|
||||
# Read the data into a Pandas DataFrame
|
||||
@@ -26,4 +45,10 @@ for i, file_path in enumerate(file_paths):
|
||||
# Check if there are any missing values in the DataFrame
|
||||
missing_values = df.isnull().sum()
|
||||
print("\nMissing Values:")
|
||||
print(missing_values)
|
||||
print(missing_values)
|
||||
|
||||
# Write the DataFrame to the corresponding table in the MariaDB database
|
||||
df.to_sql(table_name, con=engine, if_exists='replace', index=False)
|
||||
print(f"\nData from {file_path} written to the '{table_name}' table in the database.")
|
||||
|
||||
print("\nAll files have been processed and written to the database.")
|
||||
Reference in New Issue
Block a user