implemented regular garbage collection in ingester
This commit is contained in:
parent
5fe96c3b86
commit
b4117d2ee7
@ -17,9 +17,9 @@ def healpix():
|
||||
ra_list = np.array([source.ra for source in sources])
|
||||
dec_list = np.array([source.dec for source in sources])
|
||||
|
||||
skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame="fk5")
|
||||
skycoord = SkyCoord(ra=ra_list, dec=dec_list, unit='deg', frame='fk5')
|
||||
|
||||
healpix = ah.HEALPix(nside=NSIDE, order=ORDER)
|
||||
healpix = ah.HEALPix(nside=NSIDE, order=ORDER, frame='fk5')
|
||||
healpix_indices = healpix.skycoord_to_healpix(skycoord)
|
||||
|
||||
for source, healpix_index in zip(sources, healpix_indices):
|
||||
|
@ -1,4 +1,6 @@
|
||||
import os
|
||||
import gc
|
||||
import time
|
||||
import sys
|
||||
import json
|
||||
import glob
|
||||
@ -12,9 +14,11 @@ from django.core.management.base import BaseCommand
|
||||
from GaiaDBInterface.models import GaiaSource, CatalogFile
|
||||
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Ingest CSV files into the database'
|
||||
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
if os.path.exists('config.json'):
|
||||
@ -86,13 +90,17 @@ class Command(BaseCommand):
|
||||
def count_ingested_files():
|
||||
return CatalogFile.objects.filter(status='INGESTED').count()
|
||||
|
||||
|
||||
|
||||
|
||||
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S")
|
||||
self.stdout.write(f"[{current_time}] Starting the data ingestion.")
|
||||
|
||||
#function that iterates over all catalog files and ingests sources from them
|
||||
async def ingest_files():
|
||||
|
||||
#garbage collection trigger init
|
||||
gc_interval = 10 * 60 # 10 min
|
||||
last_gc_time = time.time()
|
||||
|
||||
#catalog_files = CatalogFile.objects.all()
|
||||
|
||||
@ -148,6 +156,13 @@ class Command(BaseCommand):
|
||||
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement
|
||||
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
|
||||
|
||||
#garbage collection timer logic
|
||||
current_time = time.time()
|
||||
if current_time - last_gc_time >= gc_interval:
|
||||
gc.collect()
|
||||
last_gc_time = current_time
|
||||
self.stdout.write(f"[{current_time}] Garbage collection triggered.")
|
||||
|
||||
|
||||
|
||||
asyncio.run(ingest_files())
|
Loading…
x
Reference in New Issue
Block a user