# uplim/management/commands/load_survey.py import numpy as np from astropy.io import fits from itertools import islice from datetime import datetime from django.core.management.base import BaseCommand from django.db import transaction from uplim.models import Pixel from django.db.models import Max from tqdm import tqdm import sys # DEFINE BATCH SIZE AND BATCH # ************************************************************** # BATCH_SIZE = 1000000 def batch(iterable, size): """ Generator that yields successive chunks of size 'size' from 'iterable'. """ iterable = iter(iterable) while True: chunk = list(islice(iterable, size)) if not chunk: break yield chunk class Command(BaseCommand): help = "Process FITS files and store the data in the database" # COMMAND LINE ARGUMENTS # ************************************************************** def add_arguments(self, parser): parser.add_argument( "--counts", type=str, required=True, help="Path of the counts file" ) parser.add_argument( "--exposure", type=str, required=True, help="Path of the exposure file" ) parser.add_argument( "--survey_number", type=int, required=True, help="Integer ID of the survey being read", ) parser.add_argument( "--batch_size", type=int, default=1000, help="Integer number of pixels to be inserted into the database at once", ) def handle(self, *args, **options): # GET FILENAMES FROM ARGUMENTS # ************************************************************** counts_file = options["counts"] exposure_file = options["exposure"] survey_number = options["survey_number"] BATCH_SIZE = options["batch_size"] self.stdout.write(f"\nCounts file:\t{counts_file}") self.stdout.write(f"Exposure file:\t{exposure_file}") # OPEN BOTH FILES, RAVEL EACH # ************************************************************** with fits.open(counts_file) as hdul: column_name = "T" counts_map = hdul[1].data[column_name] counts_data = counts_map.ravel() with fits.open(exposure_file) as hdul: column_name = "T" exposure_map = hdul[1].data[column_name] exposure_data = exposure_map.ravel() # COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME # ************************************************************** self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}") self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}") total_pixels = counts_data.shape[0] self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}") assert ( counts_data.shape == exposure_data.shape ), "Counts and exposure maps must have the same shape" # CREATE THE SURVEY IF IT DOES NOT EXIST # ************************************************************** # with transaction.atomic(): # survey,created = Survey.objects.get_or_create(number=survey_number) # if created: # self.stdout.write(f"Created a new survey instance with number: {survey.number}") # else: # self.stdout.write(f"Using existing survey instance with the number: {survey.number}") # FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT # ************************************************************** last_hpid = ( Pixel.objects.filter(survey=survey_number).aggregate(max_hpid=Max("hpid"))[ "max_hpid" ] or -1 ) start_index = last_hpid + 1 pixels_to_insert = total_pixels - start_index if pixels_to_insert <= 0: self.stdout.write("All pixels have already been inserted. Exiting.") pixel_generator = ( Pixel( hpid=i, counts=int(count), exposure=float(exposure), survey=survey_number, ) for i, (count, exposure) in enumerate(zip(counts_data, exposure_data)) if i >= start_index ) total_inserted = start_index pbar = tqdm( total=pixels_to_insert, unit="pix", desc=f"Survey {survey_number}", # file=sys.stdout, ) # Process in batches for pixel_batch in batch(pixel_generator, BATCH_SIZE): with transaction.atomic(): Pixel.objects.bulk_create(pixel_batch) # total_inserted += len(pixel_batch) # percentage = total_inserted / total_pixels * 100 # timestamp = datetime.now().strftime("%H:%M:%S") # self.stdout.write(f"[{timestamp}] {percentage:.2f}% inserted") pbar.update(BATCH_SIZE) pbar.close() self.stdout.write(f"Done: Inserted a total of {total_inserted} pixels.")