diff --git a/write_to_db.py b/write_to_db.py index 27d779b..5e83df1 100644 --- a/write_to_db.py +++ b/write_to_db.py @@ -21,10 +21,10 @@ engine = create_engine(connection_string) # Define a list of file paths and corresponding table names file_paths = [ - ('sec_data/2024q1/num.txt', 'num'), - ('sec_data/2024q1/pre.txt', 'pre'), - ('sec_data/2024q1/sub.txt', 'sub'), - ('sec_data/2024q1/tag.txt', 'tag') + ('sec_data/2009q3/sub.txt', 'sub'), + ('sec_data/2009q3/tag.txt', 'tag'), + ('sec_data/2009q3/num.txt', 'num'), + ('sec_data/2009q3/pre.txt', 'pre') ] # Loop through each file and write the data to the database @@ -47,8 +47,14 @@ for i, (file_path, table_name) in enumerate(file_paths): print("\nMissing Values:") print(missing_values) + # If the file being processed is 'num.txt', fix the `coreg` column + if table_name == 'num': + df['coreg'] = df['coreg'].fillna('nocoreg') + print("\nUpdated 'coreg' column (NaN values replaced with 'nocoreg'):") + print(df[['coreg']].head(10)) # Display first 10 rows of the 'coreg' column for verification + # Write the DataFrame to the corresponding table in the MariaDB database - df.to_sql(table_name, con=engine, if_exists='replace', index=False) + df.to_sql(table_name, con=engine, if_exists='append', index=False) print(f"\nData from {file_path} written to the '{table_name}' table in the database.") print("\nAll files have been processed and written to the database.") \ No newline at end of file