removed regular gc calls; added var=None in attempt to mitigate memory leaks

This commit is contained in:
Никита Тырин 2024-09-12 09:46:31 +03:00
parent b4117d2ee7
commit a8854fe4a4

View File

@ -98,12 +98,6 @@ class Command(BaseCommand):
#function that iterates over all catalog files and ingests sources from them
async def ingest_files():
#garbage collection trigger init
gc_interval = 10 * 60 # 10 min
last_gc_time = time.time()
#catalog_files = CatalogFile.objects.all()
catalog_files = await get_all_catalog_files()
for catalog_file in catalog_files:
@ -141,14 +135,20 @@ class Command(BaseCommand):
data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict
df = None #free up memory
gaia_source_instances = [
GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey
]
data_dict = None #free up memory
await update_catalog_file_status(catalog_file, 'IN_PROGRESS')
await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict
gaia_source_instances = None #free up memory
await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED'
ingested_files_count = await count_ingested_files()
@ -156,12 +156,6 @@ class Command(BaseCommand):
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
#garbage collection timer logic
current_time = time.time()
if current_time - last_gc_time >= gc_interval:
gc.collect()
last_gc_time = current_time
self.stdout.write(f"[{current_time}] Garbage collection triggered.")