uplim/management/commands/load_survey.py

164 lines
5.0 KiB
Python

# uplim/management/commands/load_survey.py
import numpy as np
from astropy.io import fits
from itertools import islice
from datetime import datetime
from django.core.management.base import BaseCommand
from django.db import transaction
from uplim.models import Pixel
from django.db.models import Max
from tqdm import tqdm
import sys
# DEFINE BATCH SIZE AND BATCH
# **************************************************************
# BATCH_SIZE = 1000000
def batch(iterable, size):
"""
Generator that yields successive chunks of size 'size' from 'iterable'.
"""
iterable = iter(iterable)
while True:
chunk = list(islice(iterable, size))
if not chunk:
break
yield chunk
class Command(BaseCommand):
help = "Process FITS files and store the data in the database"
# COMMAND LINE ARGUMENTS
# **************************************************************
def add_arguments(self, parser):
parser.add_argument(
"--counts", type=str, required=True, help="Path of the counts file"
)
parser.add_argument(
"--exposure", type=str, required=True, help="Path of the exposure file"
)
parser.add_argument(
"--survey_number",
type=int,
required=True,
help="Integer ID of the survey being read",
)
parser.add_argument(
"--batch_size",
type=int,
default=1000,
help="Integer number of pixels to be inserted into the database at once",
)
def handle(self, *args, **options):
# GET FILENAMES FROM ARGUMENTS
# **************************************************************
counts_file = options["counts"]
exposure_file = options["exposure"]
survey_number = options["survey_number"]
BATCH_SIZE = options["batch_size"]
self.stdout.write(f"\nCounts file:\t{counts_file}")
self.stdout.write(f"Exposure file:\t{exposure_file}")
# OPEN BOTH FILES, RAVEL EACH
# **************************************************************
with fits.open(counts_file) as hdul:
column_name = "T"
counts_map = hdul[1].data[column_name]
counts_data = counts_map.ravel()
with fits.open(exposure_file) as hdul:
column_name = "T"
exposure_map = hdul[1].data[column_name]
exposure_data = exposure_map.ravel()
# COMPARE DATA SHAPES, ENSURE THEY'RE THE SAME
# **************************************************************
self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}")
self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}")
total_pixels = counts_data.shape[0]
self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}")
assert (
counts_data.shape == exposure_data.shape
), "Counts and exposure maps must have the same shape"
# CREATE THE SURVEY IF IT DOES NOT EXIST
# **************************************************************
# with transaction.atomic():
# survey,created = Survey.objects.get_or_create(number=survey_number)
# if created:
# self.stdout.write(f"Created a new survey instance with number: {survey.number}")
# else:
# self.stdout.write(f"Using existing survey instance with the number: {survey.number}")
# FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT
# **************************************************************
last_hpid = (
Pixel.objects.filter(survey=survey_number).aggregate(max_hpid=Max("hpid"))[
"max_hpid"
]
or -1
)
start_index = last_hpid + 1
pixels_to_insert = total_pixels - start_index
if pixels_to_insert <= 0:
self.stdout.write("All pixels have already been inserted. Exiting.")
pixel_generator = (
Pixel(
hpid=i,
counts=int(count),
exposure=float(exposure),
survey=survey_number,
)
for i, (count, exposure) in enumerate(zip(counts_data, exposure_data))
if i >= start_index
)
total_inserted = start_index
pbar = tqdm(
total=pixels_to_insert,
unit="pix",
desc=f"Survey {survey_number}",
# file=sys.stdout,
)
# Process in batches
for pixel_batch in batch(pixel_generator, BATCH_SIZE):
with transaction.atomic():
Pixel.objects.bulk_create(pixel_batch)
# total_inserted += len(pixel_batch)
# percentage = total_inserted / total_pixels * 100
# timestamp = datetime.now().strftime("%H:%M:%S")
# self.stdout.write(f"[{timestamp}] {percentage:.2f}% inserted")
pbar.update(BATCH_SIZE)
pbar.close()
self.stdout.write(f"Done: Inserted a total of {total_inserted} pixels.")