uplim/management/commands/load_survey.py

157 lines
4.8 KiB
Python

# uplim/management/commands/load_survey.py
import numpy as np
from astropy.io import fits
from django.core.management.base import BaseCommand
from django.db import transaction
from uplim.models import Pixel, Survey
from django.db.models import Max
from itertools import islice
from datetime import datetime
# DEFINE BATCH SIZE AND BATCH
# **************************************************************
#BATCH_SIZE = 1000000
def batch(iterable, size):
"""
Generator that yields successive chunks of size 'size' from 'iterable'.
"""
iterable = iter(iterable)
while True:
chunk = list(islice(iterable, size))
if not chunk:
break
yield chunk
class Command(BaseCommand):
help = "Process FITS files and store the data in the database"
# COMMAND LINE ARGUMENTS
# **************************************************************
def add_arguments(self, parser):
parser.add_argument(
'--counts',
type=str,
required=True,
help='Path of the counts file'
)
parser.add_argument(
'--exposure',
type=str,
required=True,
help='Path of the exposure file'
)
parser.add_argument(
'--survey_number',
type=int,
required=True,
help='Integer ID of the survey being read'
)
parser.add_argument(
'--batch_size',
type=int,
default=1000,
help='Integer number of pixels to be inserted into the database at once'
)
def handle(self, *args, **options):
# GET FILENAMES FROM ARGUMENTS
# **************************************************************
counts_file = options['counts']
exposure_file = options['exposure']
survey_number = options['survey_number']
BATCH_SIZE = options['batch_size']
self.stdout.write(f"\nCounts file:\t{counts_file}")
self.stdout.write(f"Exposure file:\t{exposure_file}")
# OPEN BOTH FILES, RAVEL EACH
# **************************************************************
with fits.open(counts_file) as hdul:
column_name = "T"
counts_map = hdul[1].data[column_name]
counts_data = counts_map.ravel()
with fits.open(exposure_file) as hdul:
column_name = "T"
exposure_map = hdul[1].data[column_name]
exposure_data = exposure_map.ravel()
# COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME
# **************************************************************
self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}")
self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}")
total_pixels = counts_data.shape[0]
self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}")
assert counts_data.shape == exposure_data.shape, "Counts and exposure maps must have the same shape"
# CREATE THE SURVEY IF IT DOES NOT EXIST
# **************************************************************
with transaction.atomic():
survey,created = Survey.objects.get_or_create(number=survey_number)
if created:
self.stdout.write(f"Created a new survey instance with number: {survey.number}")
else:
self.stdout.write(f"Using existing survey instance with the number: {survey.number}")
# FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT
# **************************************************************
last_hpid = (
Pixel.objects
.filter(survey=survey)
.aggregate(max_hpid=Max('hpid'))['max_hpid']
or -1
)
start_index = last_hpid + 1
pixel_generator = (
Pixel(
hpid=i,
counts=int(count),
exposure=float(exposure),
survey=survey
)
for i, (count, exposure) in enumerate(zip(counts_data, exposure_data))
if i >= start_index
)
total_inserted = start_index
# Process in batches
for pixel_batch in batch(pixel_generator, BATCH_SIZE):
with transaction.atomic():
Pixel.objects.bulk_create(pixel_batch)
total_inserted += len(pixel_batch)
percentage = total_inserted / total_pixels * 100
timestamp = datetime.now().strftime("%H:%M:%S")
self.stdout.write(
f"[{timestamp}] {percentage:.2f}% inserted"
)
self.stdout.write(f"Inserted a total of {total_inserted} pixels.")