# uplim/management/commands/load_survey.py import numpy as np from astropy.io import fits from django.core.management.base import BaseCommand from django.db import transaction from uplim.models import Pixel#, Survey from django.db.models import Max from itertools import islice from datetime import datetime # DEFINE BATCH SIZE AND BATCH # ************************************************************** #BATCH_SIZE = 1000000 def batch(iterable, size): """ Generator that yields successive chunks of size 'size' from 'iterable'. """ iterable = iter(iterable) while True: chunk = list(islice(iterable, size)) if not chunk: break yield chunk class Command(BaseCommand): help = "Process FITS files and store the data in the database" # COMMAND LINE ARGUMENTS # ************************************************************** def add_arguments(self, parser): parser.add_argument( '--counts', type=str, required=True, help='Path of the counts file' ) parser.add_argument( '--exposure', type=str, required=True, help='Path of the exposure file' ) parser.add_argument( '--survey_number', type=int, required=True, help='Integer ID of the survey being read' ) parser.add_argument( '--batch_size', type=int, default=1000, help='Integer number of pixels to be inserted into the database at once' ) def handle(self, *args, **options): # GET FILENAMES FROM ARGUMENTS # ************************************************************** counts_file = options['counts'] exposure_file = options['exposure'] survey_number = options['survey_number'] BATCH_SIZE = options['batch_size'] self.stdout.write(f"\nCounts file:\t{counts_file}") self.stdout.write(f"Exposure file:\t{exposure_file}") # OPEN BOTH FILES, RAVEL EACH # ************************************************************** with fits.open(counts_file) as hdul: column_name = "T" counts_map = hdul[1].data[column_name] counts_data = counts_map.ravel() with fits.open(exposure_file) as hdul: column_name = "T" exposure_map = hdul[1].data[column_name] exposure_data = exposure_map.ravel() # COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME # ************************************************************** self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}") self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}") total_pixels = counts_data.shape[0] self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}") assert counts_data.shape == exposure_data.shape, "Counts and exposure maps must have the same shape" # CREATE THE SURVEY IF IT DOES NOT EXIST # ************************************************************** # with transaction.atomic(): # survey,created = Survey.objects.get_or_create(number=survey_number) # if created: # self.stdout.write(f"Created a new survey instance with number: {survey.number}") # else: # self.stdout.write(f"Using existing survey instance with the number: {survey.number}") # FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT # ************************************************************** last_hpid = ( Pixel.objects .filter(survey=survey_number) .aggregate(max_hpid=Max('hpid'))['max_hpid'] or -1 ) start_index = last_hpid + 1 pixel_generator = ( Pixel( hpid=i, counts=int(count), exposure=float(exposure), survey=survey_number ) for i, (count, exposure) in enumerate(zip(counts_data, exposure_data)) if i >= start_index ) total_inserted = start_index # Process in batches for pixel_batch in batch(pixel_generator, BATCH_SIZE): with transaction.atomic(): Pixel.objects.bulk_create(pixel_batch) total_inserted += len(pixel_batch) percentage = total_inserted / total_pixels * 100 timestamp = datetime.now().strftime("%H:%M:%S") self.stdout.write( f"[{timestamp}] {percentage:.2f}% inserted" ) self.stdout.write(f"Inserted a total of {total_inserted} pixels.")