implemented regular garbage collection in ingester
This commit is contained in:
parent
5fe96c3b86
commit
b4117d2ee7
@ -17,9 +17,9 @@ def healpix():
|
|||||||
ra_list = np.array([source.ra for source in sources])
|
ra_list = np.array([source.ra for source in sources])
|
||||||
dec_list = np.array([source.dec for source in sources])
|
dec_list = np.array([source.dec for source in sources])
|
||||||
|
|
||||||
skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame="fk5")
|
skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame='fk5')
|
||||||
|
|
||||||
healpix = ah.HEALPix(nside=NSIDE, order=ORDER)
|
healpix = ah.HEALPix(nside=NSIDE, order=ORDER, frame='fk5')
|
||||||
healpix_indices = healpix.skycoord_to_healpix(skycoord)
|
healpix_indices = healpix.skycoord_to_healpix(skycoord)
|
||||||
|
|
||||||
for source, healpix_index in zip(sources, healpix_indices):
|
for source, healpix_index in zip(sources, healpix_indices):
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
|
import gc
|
||||||
|
import time
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import glob
|
import glob
|
||||||
@ -12,9 +14,11 @@ from django.core.management.base import BaseCommand
|
|||||||
from GaiaDBInterface.models import GaiaSource, CatalogFile
|
from GaiaDBInterface.models import GaiaSource, CatalogFile
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = 'Ingest CSV files into the database'
|
help = 'Ingest CSV files into the database'
|
||||||
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
if os.path.exists('config.json'):
|
if os.path.exists('config.json'):
|
||||||
@ -86,13 +90,17 @@ class Command(BaseCommand):
|
|||||||
def count_ingested_files():
|
def count_ingested_files():
|
||||||
return CatalogFile.objects.filter(status='INGESTED').count()
|
return CatalogFile.objects.filter(status='INGESTED').count()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S")
|
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S")
|
||||||
self.stdout.write(f"[{current_time}] Starting the data ingestion.")
|
self.stdout.write(f"[{current_time}] Starting the data ingestion.")
|
||||||
|
|
||||||
#function that iterates over all catalog files and ingests sources from them
|
#function that iterates over all catalog files and ingests sources from them
|
||||||
async def ingest_files():
|
async def ingest_files():
|
||||||
|
|
||||||
|
#garbage collection trigger init
|
||||||
|
gc_interval = 10 * 60 # 10 min
|
||||||
|
last_gc_time = time.time()
|
||||||
|
|
||||||
#catalog_files = CatalogFile.objects.all()
|
#catalog_files = CatalogFile.objects.all()
|
||||||
|
|
||||||
@ -148,6 +156,13 @@ class Command(BaseCommand):
|
|||||||
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement
|
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement
|
||||||
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
|
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
|
||||||
|
|
||||||
|
#garbage collection timer logic
|
||||||
|
current_time = time.time()
|
||||||
|
if current_time - last_gc_time >= gc_interval:
|
||||||
|
gc.collect()
|
||||||
|
last_gc_time = current_time
|
||||||
|
self.stdout.write(f"[{current_time}] Garbage collection triggered.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
asyncio.run(ingest_files())
|
asyncio.run(ingest_files())
|
Loading…
x
Reference in New Issue
Block a user