fix: fixed coreg being a null value issue

This commit is contained in:
Leonard Excoffier
2024-08-31 17:17:17 -04:00
parent 57b13dbec5
commit 025ba9b913

View File

@@ -21,10 +21,10 @@ engine = create_engine(connection_string)
# Define a list of file paths and corresponding table names
file_paths = [
('sec_data/2024q1/num.txt', 'num'),
('sec_data/2024q1/pre.txt', 'pre'),
('sec_data/2024q1/sub.txt', 'sub'),
('sec_data/2024q1/tag.txt', 'tag')
('sec_data/2009q3/sub.txt', 'sub'),
('sec_data/2009q3/tag.txt', 'tag'),
('sec_data/2009q3/num.txt', 'num'),
('sec_data/2009q3/pre.txt', 'pre')
]
# Loop through each file and write the data to the database
@@ -47,8 +47,14 @@ for i, (file_path, table_name) in enumerate(file_paths):
print("\nMissing Values:")
print(missing_values)
# If the file being processed is 'num.txt', fix the `coreg` column
if table_name == 'num':
df['coreg'] = df['coreg'].fillna('nocoreg')
print("\nUpdated 'coreg' column (NaN values replaced with 'nocoreg'):")
print(df[['coreg']].head(10)) # Display first 10 rows of the 'coreg' column for verification
# Write the DataFrame to the corresponding table in the MariaDB database
df.to_sql(table_name, con=engine, if_exists='replace', index=False)
df.to_sql(table_name, con=engine, if_exists='append', index=False)
print(f"\nData from {file_path} written to the '{table_name}' table in the database.")
print("\nAll files have been processed and written to the database.")