switched backend back to numpy so it maybe fixes the memory leak

This commit is contained in:
Никита Тырин 2024-09-12 10:32:33 +03:00
parent a8854fe4a4
commit 0f3d33ac13

View File

@ -6,6 +6,7 @@ import json
import glob import glob
import uuid import uuid
import asyncio import asyncio
import tracemalloc
from datetime import datetime, timedelta from datetime import datetime, timedelta
import pandas as pd import pandas as pd
import django import django
@ -21,6 +22,7 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
if os.path.exists('config.json'): if os.path.exists('config.json'):
with open('config.json', 'r') as config_file: with open('config.json', 'r') as config_file:
config = json.load(config_file) config = json.load(config_file)
@ -124,7 +126,7 @@ class Command(BaseCommand):
file_path, file_path,
#comment='#', #comment='#',
header=1000, header=1000,
engine="pyarrow" #engine="pyarrow"
) )
gaiasource_fields = [field.name for field in GaiaSource._meta.get_fields()] #get fields from the model gaiasource_fields = [field.name for field in GaiaSource._meta.get_fields()] #get fields from the model
@ -135,19 +137,19 @@ class Command(BaseCommand):
data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict
df = None #free up memory #df = None #free up memory
gaia_source_instances = [ gaia_source_instances = [
GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey
] ]
data_dict = None #free up memory #data_dict = None #free up memory
await update_catalog_file_status(catalog_file, 'IN_PROGRESS') await update_catalog_file_status(catalog_file, 'IN_PROGRESS')
await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict
gaia_source_instances = None #free up memory #gaia_source_instances = None #free up memory
await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED' await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED'
@ -157,6 +159,4 @@ class Command(BaseCommand):
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}") self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
asyncio.run(ingest_files()) asyncio.run(ingest_files())