code formatting

This commit is contained in:
2025-05-19 15:09:04 +03:00
parent cf3213a0f9
commit 26f848d274
6 changed files with 373 additions and 417 deletions

View File

@@ -1,21 +1,21 @@
# uplim/management/commands/load_survey.py
import numpy as np
from astropy.io import fits
from itertools import islice
from datetime import datetime
from django.core.management.base import BaseCommand
from django.db import transaction
from uplim.models import Pixel
from django.db.models import Max
from itertools import islice
from datetime import datetime
# DEFINE BATCH SIZE AND BATCH
# **************************************************************
#BATCH_SIZE = 1000000
# BATCH_SIZE = 1000000
def batch(iterable, size):
"""
@@ -29,8 +29,6 @@ def batch(iterable, size):
yield chunk
class Command(BaseCommand):
help = "Process FITS files and store the data in the database"
@@ -39,40 +37,33 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
'--counts',
type=str,
required=True,
help='Path of the counts file'
"--counts", type=str, required=True, help="Path of the counts file"
)
parser.add_argument(
'--exposure',
type=str,
required=True,
help='Path of the exposure file'
"--exposure", type=str, required=True, help="Path of the exposure file"
)
parser.add_argument(
'--survey_number',
type=int,
"--survey_number",
type=int,
required=True,
help='Integer ID of the survey being read'
help="Integer ID of the survey being read",
)
parser.add_argument(
'--batch_size',
type=int,
"--batch_size",
type=int,
default=1000,
help='Integer number of pixels to be inserted into the database at once'
help="Integer number of pixels to be inserted into the database at once",
)
def handle(self, *args, **options):
# GET FILENAMES FROM ARGUMENTS
# **************************************************************
counts_file = options['counts']
exposure_file = options['exposure']
survey_number = options['survey_number']
BATCH_SIZE = options['batch_size']
counts_file = options["counts"]
exposure_file = options["exposure"]
survey_number = options["survey_number"]
BATCH_SIZE = options["batch_size"]
self.stdout.write(f"\nCounts file:\t{counts_file}")
self.stdout.write(f"Exposure file:\t{exposure_file}")
@@ -87,7 +78,6 @@ class Command(BaseCommand):
counts_data = counts_map.ravel()
with fits.open(exposure_file) as hdul:
column_name = "T"
@@ -100,49 +90,48 @@ class Command(BaseCommand):
self.stdout.write(f"\nCounts Data Shape:\t{counts_data.shape}")
self.stdout.write(f"Exposure Data Shape:\t{exposure_data.shape}")
total_pixels = counts_data.shape[0]
self.stdout.write(f"\nTotal pixels to insert:\t{total_pixels}")
assert counts_data.shape == exposure_data.shape, "Counts and exposure maps must have the same shape"
assert (
counts_data.shape == exposure_data.shape
), "Counts and exposure maps must have the same shape"
# CREATE THE SURVEY IF IT DOES NOT EXIST
# **************************************************************
# with transaction.atomic():
# survey,created = Survey.objects.get_or_create(number=survey_number)
# if created:
# self.stdout.write(f"Created a new survey instance with number: {survey.number}")
# else:
# self.stdout.write(f"Using existing survey instance with the number: {survey.number}")
# FETCH THE LAST PROCESSED HPID AND CONTINUE FROM IT
# **************************************************************
last_hpid = (
Pixel.objects
.filter(survey=survey_number)
.aggregate(max_hpid=Max('hpid'))['max_hpid']
Pixel.objects.filter(survey=survey_number).aggregate(max_hpid=Max("hpid"))[
"max_hpid"
]
or -1
)
start_index = last_hpid + 1
pixel_generator = (
Pixel(
hpid=i,
counts=int(count),
exposure=float(exposure),
survey=survey_number
survey=survey_number,
)
for i, (count, exposure) in enumerate(zip(counts_data, exposure_data))
if i >= start_index
if i >= start_index
)
total_inserted = start_index
# Process in batches
for pixel_batch in batch(pixel_generator, BATCH_SIZE):
@@ -151,8 +140,6 @@ class Command(BaseCommand):
total_inserted += len(pixel_batch)
percentage = total_inserted / total_pixels * 100
timestamp = datetime.now().strftime("%H:%M:%S")
self.stdout.write(
f"[{timestamp}] {percentage:.2f}% inserted"
)
self.stdout.write(f"[{timestamp}] {percentage:.2f}% inserted")
self.stdout.write(f"Inserted a total of {total_inserted} pixels.")

View File

@@ -7,19 +7,19 @@
from django.core.management.base import BaseCommand
from django.db import transaction
from uplim.models import Pixel, CatalogSource
import pandas as pd
import healpy as hp
import numpy as np
from astropy.coordinates import SkyCoord
from uplim.models import Pixel, CatalogSource
from itertools import islice
from datetime import datetime
BATCH_SIZE=900
BATCH_SIZE = 900
def batch(iterable, size):
iterable = iter(iterable)
@@ -28,157 +28,165 @@ def batch(iterable, size):
if not chunk:
break
yield chunk
class Command(BaseCommand):
help = "Set the 'contaminated' flag for all pixels based on the fluxes in the provided catalog."
# COMMAND LINE ARGUMENTS
# **********************
def add_arguments(self, parser):
parser.add_argument(
'--catalog',
type=str,
required=False,
help='Path to the catalog.dat file'
"--catalog", type=str, required=False, help="Path to the catalog.dat file"
)
# parser.add_argument(
# '--survey',
# type=int,
# required=False,
# help='integer number of the survey to set the flag for'
# )
parser.add_argument(
'--reset',
action='store_true',
"--reset",
action="store_true",
default=False,
help='Reset the contamination flag across all pixels back to False.'
help="Reset the contamination flag across all pixels back to False.",
)
def handle(self, *args, **options):
# RESET BEHAVIOR: SET CONTAMINATION FLAG TO FALSE FOR ALL PIXELS
# **************************************************************
if options['reset']:
if options["reset"]:
self.stdout.write("Resetting the contamination flag...")
Pixel.objects.update(contaminated = False)
Pixel.objects.update(contaminated=False)
self.stdout.write("Done")
return
if not options['catalog']:
if not options["catalog"]:
self.stdout.write("No catalog file provided, exiting")
return
catalog_file = options['catalog']
catalog_file = options["catalog"]
self.stdout.write(f"Catalog file:\t{catalog_file}")
# READ THE CATALOG FILE USING PANDAS READ_FWF
# *******************************************
# Define column positions based on the byte ranges
# Define column positions based on the byte ranges
colspecs = [
(0, 4), # SrcID (1-4)
(5, 26), # Name (6-26)
(0, 4), # SrcID (1-4)
(5, 26), # Name (6-26)
(27, 37), # RAdeg (28-37)
(38, 48), # DEdeg (39-48)
(49, 55), # ePos (50-55)
(56, 63), # Signi (57-63)
(64, 76), # Flux (65-76)
(77, 89), # e_Flux (78-89)
(90, 118), # CName (91-118)
(119, 120),# NewXray (120)
(121, 134) # Type (122-134)
(90, 118), # CName (91-118)
(119, 120), # NewXray (120)
(121, 134), # Type (122-134)
]
# Define column names
colnames = [
"SrcID", "Name", "RAdeg", "DEdeg", "ePos", "Signi", "Flux",
"e_Flux", "CName", "NewXray", "Type"
"SrcID",
"Name",
"RAdeg",
"DEdeg",
"ePos",
"Signi",
"Flux",
"e_Flux",
"CName",
"NewXray",
"Type",
]
# Read the file using the fixed-width format
catalog = pd.read_fwf(catalog_file, colspecs=colspecs, names=colnames)
for col in ['Name', 'CName', 'Type']:
catalog[col] = catalog[col].fillna('')
for col in ["Name", "CName", "Type"]:
catalog[col] = catalog[col].fillna("")
self.stdout.write(str(catalog.head()))
# LOAD THE CATALOG INTO THE DATABASE
# **********************************
existing_srcids = set(
CatalogSource.objects.values_list('srcid', flat=True)
)
existing_srcids = set(CatalogSource.objects.values_list("srcid", flat=True))
to_create = []
for _, row in catalog.iterrows():
srcid = int(row['SrcID'])
srcid = int(row["SrcID"])
if srcid in existing_srcids:
continue
to_create.append(
CatalogSource(
srcid = srcid,
name = row['Name'].strip(),
ra_deg = float(row['RAdeg']),
dec_deg = float(row['DEdeg']),
pos_error = float(row['ePos']),
significance = float(row['Signi']),
flux = float(row['Flux']),
flux_error = float(row['e_Flux']),
catalog_name = row['CName'].strip(),
new_xray = bool(int(row['NewXray'])),
source_type = row['Type'].strip()
srcid=srcid,
name=row["Name"].strip(),
ra_deg=float(row["RAdeg"]),
dec_deg=float(row["DEdeg"]),
pos_error=float(row["ePos"]),
significance=float(row["Signi"]),
flux=float(row["Flux"]),
flux_error=float(row["e_Flux"]),
catalog_name=row["CName"].strip(),
new_xray=bool(int(row["NewXray"])),
source_type=row["Type"].strip(),
)
)
if to_create:
self.stdout.write(f'Inserting {len(to_create)} new catalog rows.')
self.stdout.write(f"Inserting {len(to_create)} new catalog rows.")
for chunk in batch(to_create, BATCH_SIZE):
CatalogSource.objects.bulk_create(chunk, ignore_conflicts=True)
self.stdout.write('Catalog update complete.')
self.stdout.write("Catalog update complete.")
else:
self.stdout.write('All catalog rows already exist in the database.')
self.stdout.write("All catalog rows already exist in the database.")
# hard coded nside and flux-radius mapping
# maybe change that
nside = 4096
npix = hp.nside2npix(nside)
flux_bins = [0, 125, 250, 2000, 20000, np.inf] # define bin edges
mask_radii_deg = [ 0.06, 0.15, 0.5, 0.9, 2.5 ] # corresponding mask radii in degrees
flux_bins = [0, 125, 250, 2000, 20000, np.inf] # define bin edges
mask_radii_deg = [
0.06,
0.15,
0.5,
0.9,
2.5,
] # corresponding mask radii in degrees
# Convert mask radii from degrees to radians (required by query_disc)
mask_radii = [np.radians(r) for r in mask_radii_deg]
# Use pandas.cut to assign each source a bin index (0, 1, or 2)
catalog['flux_bin'] = pd.cut(catalog['Flux'], bins=flux_bins, labels=False)
catalog["flux_bin"] = pd.cut(catalog["Flux"], bins=flux_bins, labels=False)
# manually add and change some sources
manual_additions = pd.DataFrame(
[
{'RAdeg' : 279.9804336, 'DEdeg' : 5.0669542, 'flux_bin' : 3},
{'RAdeg' : 266.5173685, 'DEdeg' : -29.1252321, 'flux_bin' : 3},
{"RAdeg": 279.9804336, "DEdeg": 5.0669542, "flux_bin": 3},
{"RAdeg": 266.5173685, "DEdeg": -29.1252321, "flux_bin": 3},
]
)
catalog = pd.concat([catalog, manual_additions], ignore_index=True)
catalog.loc[catalog['SrcID'] == 1101, 'flux_bin'] = 2
catalog.loc[catalog["SrcID"] == 1101, "flux_bin"] = 2
mask_array = np.ones(npix, dtype=bool)
@@ -188,62 +196,58 @@ class Command(BaseCommand):
# process each source in the catalog
for _, row in catalog.iterrows():
ra = row['RAdeg']
dec = row['DEdeg']
src_coord = SkyCoord(
ra, dec, unit = 'deg', frame = 'icrs'
)
ra = row["RAdeg"]
dec = row["DEdeg"]
src_coord = SkyCoord(ra, dec, unit="deg", frame="icrs")
gal = src_coord.galactic
ra, dec = gal.l.deg, gal.b.deg
flux_bin = row['flux_bin'] # 0, 1, or 2
flux_bin = row["flux_bin"] # 0, 1, or 2
# Get the corresponding mask radius (in radians) for this flux bin
radius = mask_radii[flux_bin]
# Convert (ra, dec) to HEALPix spherical coordinates
theta = np.radians(90.0 - dec)
phi = np.radians(ra)
vec = hp.ang2vec(theta, phi)
# Query all pixels within the given radius
# 'inclusive=True' makes sure pixels on the edge are included
pix_indices = hp.query_disc(nside, vec, radius, inclusive=True)
# Mark these pixels as bad (False) in our mask
mask_array[pix_indices] = False
# Add the pixel indices to our set of masked pixels
masked_pixels_set.update(pix_indices)
# Convert the set of masked pixels to a sorted list.
masked_pixels_list = sorted(list(masked_pixels_set))
# print("Number of masked pixels:", len(masked_pixels_list))
self.stdout.write("\nList ready, updating the database...")
if not masked_pixels_list:
self.stdout.write("No pixels marked as contaminated, exiting.")
return
total = len(masked_pixels_list)
updated = 0
self.stdout.write(f'\nUpdating contaminated flag in batches of {BATCH_SIZE}')
self.stdout.write(f"\nUpdating contaminated flag in batches of {BATCH_SIZE}")
for chunk in batch(masked_pixels_list, BATCH_SIZE):
with transaction.atomic():
Pixel.objects.filter(hpid__in=chunk).update(contaminated=True)
updated += len(chunk)
percentage = updated / total * 100
timestamp = datetime.now().strftime("%H:%M:%S")
self.stdout.write(f'[{timestamp}] {updated}/{total} ({percentage:.1f}%) updated')
self.stdout.write(f'\n Marked {updated} pixels as contaminated.')
self.stdout.write(
f"[{timestamp}] {updated}/{total} ({percentage:.1f}%) updated"
)
self.stdout.write(f"\n Marked {updated} pixels as contaminated.")