uplim/management/commands/load_survey.py

# axc_ul/management/commands/load_survey.py

import numpy as np
from astropy.io import fits

from django.core.management.base import BaseCommand
from django.db import transaction
from axc_ul.models import Pixel, Survey
from django.db.models import Max

from itertools import islice

from datetime import datetime

# DEFINE BATCH SIZE AND BATCH
# **************************************************************

BATCH_SIZE = 1000000

def batch(iterable, size):
    """
    Generator that yields successive chunks of size 'size' from 'iterable'.
    """
    iterable = iter(iterable)
    while True:
        chunk = list(islice(iterable, size))
        if not chunk:
            break
        yield chunk


class Command(BaseCommand):
    help = "Process FITS files and store the data in the database"

    # COMMAND LINE ARGUMENTS
    # **************************************************************

    def add_arguments(self, parser):
        parser.add_argument(
            '--counts',
            type=str,
            required=True,
            help='Path of the counts file'
        )
        parser.add_argument(
            '--exposure',
            type=str,
            required=True,
            help='Path of the exposure file'
        )
        parser.add_argument(
            '--survey_number',
            type=int,
            required=True,
            help='Integer ID of the survey being read'
        )


    def handle(self, *args, **options):

        # GET FILENAMES FROM ARGUMENTS
        # **************************************************************

        counts_file = options['counts']
        exposure_file = options['exposure']
        survey_number = options['survey_number']

        self.stdout.write(f"\nCounts file:\t{counts_file}")
        self.stdout.write(f"Exposure file:\t{exposure_file}")

        # OPEN BOTH FILES, RAVEL EACH
        # **************************************************************

        with fits.open(counts_file) as hdul:

            column_name = "T"
            counts_map = hdul[1].data[column_name]

        counts_data = counts_map.ravel()


        with fits.open(exposure_file) as hdul:

            column_name = "T"
            exposure_map = hdul[1].data[column_name]

        exposure_data = exposure_map.ravel()

        # COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME
        # **************************************************************

        self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}")
        self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}")

        total_pixels = counts_data.shape[0]
        self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}")

        assert counts_data.shape == exposure_data.shape, "Counts and exposure maps must have the same shape"

        # CREATE THE SURVEY IF IT DOES NOT EXIST
        # **************************************************************

        with transaction.atomic():

            survey,created = Survey.objects.get_or_create(number=survey_number)

            if created:
                self.stdout.write(f"Created a new survey instance with number: {survey.number}")
            else:
                self.stdout.write(f"Using existing survey instance with the number: {survey.number}")

        # FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT
        # **************************************************************

        last_hpid = (
            Pixel.objects
                .filter(survey=survey)
                .aggregate(max_hpid=Max('hpid'))['max_hpid']
            or -1
        )
        start_index = last_hpid + 1

        pixel_generator = (
            Pixel(
                hpid=i,
                counts=int(count),
                exposure=float(exposure),
                survey=survey
            )
            for i, (count, exposure) in enumerate(zip(counts_data, exposure_data))
            if i >= start_index
        )


        total_inserted = start_index
        # Process in batches
        for pixel_batch in batch(pixel_generator, BATCH_SIZE):
            with transaction.atomic():
                Pixel.objects.bulk_create(pixel_batch)
            total_inserted += len(pixel_batch)
            percentage = total_inserted / total_pixels * 100
            timestamp = datetime.now().strftime("%H:%M:%S")
            self.stdout.write(
                f"[{timestamp}] {percentage:.2f}% inserted"
            )

        self.stdout.write(f"Inserted a total of {total_inserted} pixels.")