uplim/management/commands/load_survey.py
2025-05-08 14:42:39 +03:00

151 lines
4.5 KiB
Python

# axc_ul/management/commands/load_survey.py
import numpy as np
from astropy.io import fits
from django.core.management.base import BaseCommand
from django.db import transaction
from axc_ul.models import Pixel, Survey
from django.db.models import Max
from itertools import islice
from datetime import datetime
# DEFINE BATCH SIZE AND BATCH
# **************************************************************
BATCH_SIZE = 1000000
def batch(iterable, size):
"""
Generator that yields successive chunks of size 'size' from 'iterable'.
"""
iterable = iter(iterable)
while True:
chunk = list(islice(iterable, size))
if not chunk:
break
yield chunk
class Command(BaseCommand):
help = "Process FITS files and store the data in the database"
# COMMAND LINE ARGUMENTS
# **************************************************************
def add_arguments(self, parser):
parser.add_argument(
'--counts',
type=str,
required=True,
help='Path of the counts file'
)
parser.add_argument(
'--exposure',
type=str,
required=True,
help='Path of the exposure file'
)
parser.add_argument(
'--survey_number',
type=int,
required=True,
help='Integer ID of the survey being read'
)
def handle(self, *args, **options):
# GET FILENAMES FROM ARGUMENTS
# **************************************************************
counts_file = options['counts']
exposure_file = options['exposure']
survey_number = options['survey_number']
self.stdout.write(f"\nCounts file:\t{counts_file}")
self.stdout.write(f"Exposure file:\t{exposure_file}")
# OPEN BOTH FILES, RAVEL EACH
# **************************************************************
with fits.open(counts_file) as hdul:
column_name = "T"
counts_map = hdul[1].data[column_name]
counts_data = counts_map.ravel()
with fits.open(exposure_file) as hdul:
column_name = "T"
exposure_map = hdul[1].data[column_name]
exposure_data = exposure_map.ravel()
# COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME
# **************************************************************
self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}")
self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}")
total_pixels = counts_data.shape[0]
self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}")
assert counts_data.shape == exposure_data.shape, "Counts and exposure maps must have the same shape"
# CREATE THE SURVEY IF IT DOES NOT EXIST
# **************************************************************
with transaction.atomic():
survey,created = Survey.objects.get_or_create(number=survey_number)
if created:
self.stdout.write(f"Created a new survey instance with number: {survey.number}")
else:
self.stdout.write(f"Using existing survey instance with the number: {survey.number}")
# FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT
# **************************************************************
last_hpid = (
Pixel.objects
.filter(survey=survey)
.aggregate(max_hpid=Max('hpid'))['max_hpid']
or -1
)
start_index = last_hpid + 1
pixel_generator = (
Pixel(
hpid=i,
counts=int(count),
exposure=float(exposure),
survey=survey
)
for i, (count, exposure) in enumerate(zip(counts_data, exposure_data))
if i >= start_index
)
total_inserted = start_index
# Process in batches
for pixel_batch in batch(pixel_generator, BATCH_SIZE):
with transaction.atomic():
Pixel.objects.bulk_create(pixel_batch)
total_inserted += len(pixel_batch)
percentage = total_inserted / total_pixels * 100
timestamp = datetime.now().strftime("%H:%M:%S")
self.stdout.write(
f"[{timestamp}] {percentage:.2f}% inserted"
)
self.stdout.write(f"Inserted a total of {total_inserted} pixels.")