switched backend back to numpy so it maybe fixes the memory leak

This commit is contained in:
Никита Тырин 2024-09-12 10:32:33 +03:00
parent a8854fe4a4
commit 0f3d33ac13

View File

@ -6,6 +6,7 @@ import json
import glob
import uuid
import asyncio
import tracemalloc
from datetime import datetime, timedelta
import pandas as pd
import django
@ -17,10 +18,11 @@ from GaiaDBInterface.models import GaiaSource, CatalogFile
class Command(BaseCommand):
help = 'Ingest CSV files into the database'
def handle(self, *args, **options):
if os.path.exists('config.json'):
with open('config.json', 'r') as config_file:
config = json.load(config_file)
@ -124,7 +126,7 @@ class Command(BaseCommand):
file_path,
#comment='#',
header=1000,
engine="pyarrow"
#engine="pyarrow"
)
gaiasource_fields = [field.name for field in GaiaSource._meta.get_fields()] #get fields from the model
@ -135,19 +137,19 @@ class Command(BaseCommand):
data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict
df = None #free up memory
#df = None #free up memory
gaia_source_instances = [
GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey
]
data_dict = None #free up memory
#data_dict = None #free up memory
await update_catalog_file_status(catalog_file, 'IN_PROGRESS')
await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict
gaia_source_instances = None #free up memory
#gaia_source_instances = None #free up memory
await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED'
@ -157,6 +159,4 @@ class Command(BaseCommand):
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
asyncio.run(ingest_files())
asyncio.run(ingest_files())