feat: now loops over all the facts jsons
This commit is contained in:
118
write_to_db.py
118
write_to_db.py
@@ -1,14 +1,14 @@
|
|||||||
|
import os
|
||||||
import mariadb
|
import mariadb
|
||||||
import json
|
import json
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
def connect_to_db():
|
def connect_to_db():
|
||||||
try:
|
try:
|
||||||
# Read the needed variables from the environment
|
# Read the connection parameters from the environment
|
||||||
conn = mariadb.connect(
|
conn = mariadb.connect(
|
||||||
user=os.getenv("DB_USER"),
|
user=os.getenv("DB_USER"),
|
||||||
password=os.getenv("DB_PASSWORD"),
|
password=os.getenv("DB_PASSWORD"),
|
||||||
@@ -38,63 +38,81 @@ def insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed
|
|||||||
(cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
|
(cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
|
||||||
)
|
)
|
||||||
|
|
||||||
def parse_json_and_insert_data(file_path, cursor):
|
def parse_json_and_insert_data(file_path):
|
||||||
with open(file_path, 'r') as file:
|
with open(file_path, 'r') as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
|
|
||||||
# Insert the entity
|
# Get the connection and cursor
|
||||||
cik = data['cik']
|
conn = connect_to_db()
|
||||||
entity_name = data['entityName']
|
if conn is None:
|
||||||
insert_entity(cursor, cik, entity_name)
|
return False
|
||||||
|
|
||||||
# Iterate over facts
|
try:
|
||||||
for taxonomy, fact_details in data['facts'].items():
|
cursor = conn.cursor()
|
||||||
for fact_id, fact in fact_details.items():
|
|
||||||
# Get fact details
|
|
||||||
label = fact.get('label')
|
|
||||||
description = fact.get('description')
|
|
||||||
|
|
||||||
for unit, unit_vals in fact.get('units', {}).items():
|
# Insert the entity
|
||||||
# Insert fact (taxonomy level doesn't seem directly stored in JSON)
|
cik = data.get('cik')
|
||||||
insert_fact(cursor, taxonomy, fact_id, label, description, unit)
|
entity_name = data.get('entityName')
|
||||||
|
insert_entity(cursor, cik, entity_name)
|
||||||
|
|
||||||
# Insert each data point
|
# Iterate over facts
|
||||||
for entry in unit_vals:
|
for taxonomy, fact_details in data['facts'].items():
|
||||||
start = entry.get('start', None)
|
for fact_id, fact in fact_details.items():
|
||||||
end = entry['end']
|
# Get fact details
|
||||||
val = entry['val']
|
label = fact.get('label')
|
||||||
accn = entry['accn']
|
description = fact.get('description')
|
||||||
fy = entry['fy']
|
|
||||||
fp = entry['fp']
|
|
||||||
form = entry['form']
|
|
||||||
filed = entry['filed']
|
|
||||||
frame = entry.get('frame', None)
|
|
||||||
|
|
||||||
insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
|
for unit, unit_vals in fact.get('units', {}).items():
|
||||||
|
# Insert fact (taxonomy level doesn't seem directly stored in JSON)
|
||||||
|
insert_fact(cursor, taxonomy, fact_id, label, description, unit)
|
||||||
|
|
||||||
|
# Insert each data point
|
||||||
|
for entry in unit_vals:
|
||||||
|
start = entry.get('start', None)
|
||||||
|
end = entry['end']
|
||||||
|
val = entry['val']
|
||||||
|
accn = entry['accn']
|
||||||
|
fy = entry['fy']
|
||||||
|
fp = entry['fp']
|
||||||
|
form = entry['form']
|
||||||
|
filed = entry['filed']
|
||||||
|
frame = entry.get('frame', None)
|
||||||
|
|
||||||
|
insert_data(cursor, cik, fact_id, start, end, val, accn, fy, fp, form, filed, frame)
|
||||||
|
|
||||||
|
# Commit the transaction for the whole file
|
||||||
|
conn.commit()
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error occurred while processing {file_path}: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
return False
|
||||||
|
finally:
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def process_all_files_in_directory(directory_path):
|
||||||
|
files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
|
||||||
|
total_files = len(files)
|
||||||
|
processed_files = 0
|
||||||
|
|
||||||
|
for idx, file_name in enumerate(files, start=1):
|
||||||
|
file_path = os.path.join(directory_path, file_name)
|
||||||
|
print(f"Processing file {idx} of {total_files}: {file_name}")
|
||||||
|
|
||||||
|
if parse_json_and_insert_data(file_path):
|
||||||
|
processed_files += 1
|
||||||
|
print(f"Successfully processed {file_name}")
|
||||||
|
else:
|
||||||
|
print(f"Failed to process {file_name}")
|
||||||
|
|
||||||
|
print(f"Finished processing {processed_files} out of {total_files} files.")
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Connect to the database
|
# Process all JSON files in the directory
|
||||||
conn = connect_to_db()
|
directory_path = './sec_data/companyfacts/'
|
||||||
if conn is None:
|
process_all_files_in_directory(directory_path)
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
cursor = conn.cursor()
|
|
||||||
|
|
||||||
# Load JSON and insert data
|
|
||||||
json_file_path = 'CIK0000320193.json'
|
|
||||||
parse_json_and_insert_data(json_file_path, cursor)
|
|
||||||
|
|
||||||
# Commit the transaction
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error occurred: {e}")
|
|
||||||
conn.rollback()
|
|
||||||
finally:
|
|
||||||
cursor.close()
|
|
||||||
conn.close()
|
|
||||||
print("Connection closed")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user