# uplim/management/commands/load_survey.py import numpy as np from astropy.io import fits from django.core.management.base import BaseCommand from django.db import transaction from uplim.models import Pixel, Survey from django.db.models import Max from itertools import islice from datetime import datetime # DEFINE BATCH SIZE AND BATCH # ************************************************************** #BATCH_SIZE = 1000000 def batch(iterable, size): """ Generator that yields successive chunks of size 'size' from 'iterable'. """ iterable = iter(iterable) while True: chunk = list(islice(iterable, size)) if not chunk: break yield chunk class Command(BaseCommand): help = "Process FITS files and store the data in the database" # COMMAND LINE ARGUMENTS # ************************************************************** def add_arguments(self, parser): parser.add_argument( '--counts', type=str, required=True, help='Path of the counts file' ) parser.add_argument( '--exposure', type=str, required=True, help='Path of the exposure file' ) parser.add_argument( '--survey_number', type=int, required=True, help='Integer ID of the survey being read' ) parser.add_argument( '--batch_size', type=int, required=True, help='Integer number of pixels to be inserted into the database at once' ) def handle(self, *args, **options): # GET FILENAMES FROM ARGUMENTS # ************************************************************** counts_file = options['counts'] exposure_file = options['exposure'] survey_number = options['survey_number'] BATCH_SIZE = options['batch_size'] self.stdout.write(f"\nCounts file:\t{counts_file}") self.stdout.write(f"Exposure file:\t{exposure_file}") # OPEN BOTH FILES, RAVEL EACH # ************************************************************** with fits.open(counts_file) as hdul: column_name = "T" counts_map = hdul[1].data[column_name] counts_data = counts_map.ravel() with fits.open(exposure_file) as hdul: column_name = "T" exposure_map = hdul[1].data[column_name] exposure_data = exposure_map.ravel() # COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME # ************************************************************** self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}") self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}") total_pixels = counts_data.shape[0] self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}") assert counts_data.shape == exposure_data.shape, "Counts and exposure maps must have the same shape" # CREATE THE SURVEY IF IT DOES NOT EXIST # ************************************************************** with transaction.atomic(): survey,created = Survey.objects.get_or_create(number=survey_number) if created: self.stdout.write(f"Created a new survey instance with number: {survey.number}") else: self.stdout.write(f"Using existing survey instance with the number: {survey.number}") # FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT # ************************************************************** last_hpid = ( Pixel.objects .filter(survey=survey) .aggregate(max_hpid=Max('hpid'))['max_hpid'] or -1 ) start_index = last_hpid + 1 pixel_generator = ( Pixel( hpid=i, counts=int(count), exposure=float(exposure), survey=survey ) for i, (count, exposure) in enumerate(zip(counts_data, exposure_data)) if i >= start_index ) total_inserted = start_index # Process in batches for pixel_batch in batch(pixel_generator, BATCH_SIZE): with transaction.atomic(): Pixel.objects.bulk_create(pixel_batch) total_inserted += len(pixel_batch) percentage = total_inserted / total_pixels * 100 timestamp = datetime.now().strftime("%H:%M:%S") self.stdout.write( f"[{timestamp}] {percentage:.2f}% inserted" ) self.stdout.write(f"Inserted a total of {total_inserted} pixels.")