diff --git a/management/commands/ingester.py b/management/commands/ingester.py index 3b29f68..0253cee 100644 --- a/management/commands/ingester.py +++ b/management/commands/ingester.py @@ -6,6 +6,7 @@ import json import glob import uuid import asyncio +import tracemalloc from datetime import datetime, timedelta import pandas as pd import django @@ -17,10 +18,11 @@ from GaiaDBInterface.models import GaiaSource, CatalogFile class Command(BaseCommand): help = 'Ingest CSV files into the database' - + def handle(self, *args, **options): + if os.path.exists('config.json'): with open('config.json', 'r') as config_file: config = json.load(config_file) @@ -124,7 +126,7 @@ class Command(BaseCommand): file_path, #comment='#', header=1000, - engine="pyarrow" + #engine="pyarrow" ) gaiasource_fields = [field.name for field in GaiaSource._meta.get_fields()] #get fields from the model @@ -135,19 +137,19 @@ class Command(BaseCommand): data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict - df = None #free up memory + #df = None #free up memory gaia_source_instances = [ GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey ] - data_dict = None #free up memory + #data_dict = None #free up memory await update_catalog_file_status(catalog_file, 'IN_PROGRESS') await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict - gaia_source_instances = None #free up memory + #gaia_source_instances = None #free up memory await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED' @@ -157,6 +159,4 @@ class Command(BaseCommand): self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}") - - - asyncio.run(ingest_files()) \ No newline at end of file + asyncio.run(ingest_files())