implemented regular garbage collection in ingester

This commit is contained in:
Никита Тырин 2024-09-11 15:45:46 +03:00
parent 5fe96c3b86
commit b4117d2ee7
2 changed files with 18 additions and 3 deletions

View File

@ -17,9 +17,9 @@ def healpix():
ra_list = np.array([source.ra for source in sources])
dec_list = np.array([source.dec for source in sources])
skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame="fk5")
skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame='fk5')
healpix = ah.HEALPix(nside=NSIDE, order=ORDER)
healpix = ah.HEALPix(nside=NSIDE, order=ORDER, frame='fk5')
healpix_indices = healpix.skycoord_to_healpix(skycoord)
for source, healpix_index in zip(sources, healpix_indices):

View File

@ -1,4 +1,6 @@
import os
import gc
import time
import sys
import json
import glob
@ -12,9 +14,11 @@ from django.core.management.base import BaseCommand
from GaiaDBInterface.models import GaiaSource, CatalogFile
class Command(BaseCommand):
help = 'Ingest CSV files into the database'
def handle(self, *args, **options):
if os.path.exists('config.json'):
@ -86,13 +90,17 @@ class Command(BaseCommand):
def count_ingested_files():
return CatalogFile.objects.filter(status='INGESTED').count()
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S")
self.stdout.write(f"[{current_time}] Starting the data ingestion.")
#function that iterates over all catalog files and ingests sources from them
async def ingest_files():
#garbage collection trigger init
gc_interval = 10 * 60 # 10 min
last_gc_time = time.time()
#catalog_files = CatalogFile.objects.all()
@ -148,6 +156,13 @@ class Command(BaseCommand):
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
#garbage collection timer logic
current_time = time.time()
if current_time - last_gc_time >= gc_interval:
gc.collect()
last_gc_time = current_time
self.stdout.write(f"[{current_time}] Garbage collection triggered.")
asyncio.run(ingest_files())