diff --git a/management/commands/indexer.py b/management/commands/indexer.py index 3e66cbd..c318cc9 100644 --- a/management/commands/indexer.py +++ b/management/commands/indexer.py @@ -17,9 +17,9 @@ def healpix(): ra_list = np.array([source.ra for source in sources]) dec_list = np.array([source.dec for source in sources]) - skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame="fk5") + skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame='fk5') - healpix = ah.HEALPix(nside=NSIDE, order=ORDER) + healpix = ah.HEALPix(nside=NSIDE, order=ORDER, frame='fk5') healpix_indices = healpix.skycoord_to_healpix(skycoord) for source, healpix_index in zip(sources, healpix_indices): diff --git a/management/commands/ingester.py b/management/commands/ingester.py index 713151d..70b632f 100644 --- a/management/commands/ingester.py +++ b/management/commands/ingester.py @@ -1,4 +1,6 @@ import os +import gc +import time import sys import json import glob @@ -12,9 +14,11 @@ from django.core.management.base import BaseCommand from GaiaDBInterface.models import GaiaSource, CatalogFile + class Command(BaseCommand): help = 'Ingest CSV files into the database' + def handle(self, *args, **options): if os.path.exists('config.json'): @@ -86,13 +90,17 @@ class Command(BaseCommand): def count_ingested_files(): return CatalogFile.objects.filter(status='INGESTED').count() - + current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") self.stdout.write(f"[{current_time}] Starting the data ingestion.") #function that iterates over all catalog files and ingests sources from them async def ingest_files(): + + #garbage collection trigger init + gc_interval = 10 * 60 # 10 min + last_gc_time = time.time() #catalog_files = CatalogFile.objects.all() @@ -148,6 +156,13 @@ class Command(BaseCommand): current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}") + #garbage collection timer logic + current_time = time.time() + if current_time - last_gc_time >= gc_interval: + gc.collect() + last_gc_time = current_time + self.stdout.write(f"[{current_time}] Garbage collection triggered.") + asyncio.run(ingest_files()) \ No newline at end of file