upper limit NaN fix; ECF & EEF values update

2025-05-13 15:38:03 +03:00 · 2025-05-13 15:38:03 +03:00 · ffaa663fdd
commit ffaa663fdd
parent d46e5e5fa6
3 changed files with 222 additions and 21 deletions
--- a/management/commands/set_contaminated.py
+++ b/management/commands/set_contaminated.py
@ -0,0 +1,176 @@
 # uplim/management/commands/set_contaminated.py
 # add custom flux-radius mapping?
 # add specifying the columns?
 # do contamination setting per survey?
 # include nside for surveys?
 from django.core.management.base import BaseCommand
 import pandas as pd
 import healpy as hp
 import numpy as np
 from astropy.coordinates import SkyCoord
 from uplim.models import Pixel
 class Command(BaseCommand):
    help = "Set the 'contaminated' flag for all pixels based on the fluxes in the provided catalog."
    # COMMAND LINE ARGUMENTS
    # **********************
    def add_arguments(self, parser):
        parser.add_argument(
            '--catalog',
            type=str,
            required=False,
            help='Path to the catalog.dat file'
        )
        # parser.add_argument(
        #     '--survey',
        #     type=int,
        #     required=False,
        #     help='integer number of the survey to set the flag for'
        # )
        parser.add_argument(
            '--reset',
            action='store_true',
            default=False,
            help='Reset the contamination flag across all pixels back to False.'
        )
    def handle(self, *args, **options):
        # RESET BEHAVIOR: SET CONTAMINATION FLAG TO FALSE FOR ALL PIXELS
        # **************************************************************
        if options['reset']:
            self.stdout.write("Resetting the contamination flag...")
            Pixel.objects.update(contaminated = False)
            self.stdout.write("Done")
            return
        if not options['catalog']:
            self.stdout.write("No catalog file provided, exiting")
            return
        catalog_file = options['catalog']
        self.stdout.write(f"Catalog file:\t{catalog_file}")
        # READ THE CATALOG FILE USING PANDAS READ_FWF
        # *******************************************
        # Define column positions based on the byte ranges in your table
        colspecs = [
            (0, 4),    # SrcID (1-4)
            (5, 26),   # Name (6-26)
            (27, 37),  # RAdeg (28-37)
            (38, 48),  # DEdeg (39-48)
            (49, 55),  # ePos (50-55)
            (56, 63),  # Signi (57-63)
            (64, 76),  # Flux (65-76)
            (77, 89),  # e_Flux (78-89)
            (90, 118), # CName (91-118)
            (119, 120),# NewXray (120)
            (121, 134) # Type (122-134)
        ]
        # Define column names
        colnames = [
            "SrcID", "Name", "RAdeg", "DEdeg", "ePos", "Signi", "Flux",
            "e_Flux", "CName", "NewXray", "Type"
        ]
        # Read the file using the fixed-width format
        catalog = pd.read_fwf(catalog_file, colspecs=colspecs, names=colnames)
        self.stdout.write(str(catalog.head()))
        # hard coded nside and flux-radius mapping
        # maybe change that
        nside = 4096
        npix = hp.nside2npix(nside)
        flux_bins =      [0,  125,  250, 2000, 20000, np.inf]          # define bin edges
        mask_radii_deg = [ 0.06, 0.15, 0.5,  0.9,   2.5     ]           # corresponding mask radii in degrees
        # Convert mask radii from degrees to radians (required by query_disc)
        mask_radii = [np.radians(r) for r in mask_radii_deg]
        # Use pandas.cut to assign each source a bin index (0, 1, or 2)
        catalog['flux_bin'] = pd.cut(catalog['Flux'], bins=flux_bins, labels=False)
        # manually add and change some sources
        manual_additions = pd.DataFrame(
            [
                {'RAdeg' : 279.9804336, 'DEdeg' : 5.0669542,   'flux_bin' : 3},
                {'RAdeg' : 266.5173685, 'DEdeg' : -29.1252321, 'flux_bin' : 3},
            ]
        )
        catalog = pd.concat([catalog, manual_additions], ignore_index=True)
        catalog.loc[catalog['SrcID'] == 1101, 'flux_bin'] = 2  
        mask_array = np.ones(npix, dtype=bool)
        masked_pixels_set = set()
        self.stdout.write("\nCreating a list of contaminated pixels...")
        # process each source in the catalog
        for _, row in catalog.iterrows():
            ra = row['RAdeg']
            dec = row['DEdeg']
            src_coord = SkyCoord(
                ra, dec, unit = 'deg', frame = 'icrs'
            )
            gal = src_coord.galactic
            ra, dec = gal.l.deg, gal.b.deg
            flux_bin = row['flux_bin']  # 0, 1, or 2
            # Get the corresponding mask radius (in radians) for this flux bin
            radius = mask_radii[flux_bin]
            # Convert (ra, dec) to HEALPix spherical coordinates
            theta = np.radians(90.0 - dec)
            phi = np.radians(ra)
            vec = hp.ang2vec(theta, phi)
            # Query all pixels within the given radius
            # 'inclusive=True' makes sure pixels on the edge are included
            pix_indices = hp.query_disc(nside, vec, radius, inclusive=True)
            # Mark these pixels as bad (False) in our mask
            mask_array[pix_indices] = False
            # Add the pixel indices to our set of masked pixels
            masked_pixels_set.update(pix_indices)
        # Convert the set of masked pixels to a sorted list.
        masked_pixels_list = sorted(list(masked_pixels_set))
        # print("Number of masked pixels:", len(masked_pixels_list))
        self.stdout.write("\nList ready, updating the database...")
        if masked_pixels_list:
            Pixel.objects.filter(hpid__in=masked_pixels_list).update(contaminated=True)
            self.stdout.write(f"\nMarked {len(masked_pixels_list)} pixels as contaminated.")
        else:
            self.stdout.write("No pixels marked as contaminated, exiting.")
--- a/models.py
+++ b/models.py
@ -10,12 +10,8 @@ class Survey(models.Model):
    number = models.IntegerField(unique=True)
    # nside = models.IntegerField(
    #     default=4096
    # )
    def __str__(self):
-        return f"Survey {self.number} of NSIDE {self.nside}"
+        return f"Survey {self.number}"
@ -23,7 +19,7 @@ class Survey(models.Model):
 class Pixel(models.Model):
-    id = models.AutoField(primary_key=True)
+    #id = models.AutoField(primary_key=True) # ~2 million pixels for a 4096 survey
    survey = models.ForeignKey(
        Survey,
@ -32,17 +28,17 @@ class Pixel(models.Model):
        default=0
    )
-    hpid = models.IntegerField(db_index=True)
+    hpid = models.IntegerField(db_index=True) # up to over 200 million
-    counts = models.IntegerField()
+    counts = models.IntegerField()             # f4, up to ~44k integer: 2 byte too small 
-    exposure = models.FloatField()
+    exposure = models.FloatField()             # f4, up to ~13300 float
    contaminated = models.BooleanField(default=False)
    class Meta:
        constraints = [
-            UniqueConstraint(fields=['survey', 'hpid'], name='unique_hpid_per_survey'),
+            UniqueConstraint(fields=['survey','hpid'], name='unique_hpid_per_survey'),
        ]
    def __str__(self):
--- a/views.py
+++ b/views.py
@ -5,6 +5,7 @@
 import healpy as hp
 import astropy.units as u
 from astropy.coordinates import SkyCoord
 import numpy as np
 import scipy.special as sp
 from astropy.stats import poisson_conf_interval
@ -18,6 +19,22 @@ from django.shortcuts import get_object_or_404
 from uplim.models import Pixel
 # SANITIZE RESPONSE DATA BEFORE JSON CONVERSION FOR DEBUGGING NANS
 # now NaNs are converted to 'null' beforehand
 # ****************************************************************
 def sanitize(obj):
    if isinstance(obj, dict):
        return {k: sanitize(v) for k, v in obj.items()}
    if isinstance(obj, (list, tuple)):
        return [sanitize(v) for v in obj]
    # handle numpy scalars
    if isinstance(obj, np.generic):
        v = obj.item()
        return None if (np.isnan(v) or np.isinf(v)) else v
    if isinstance(obj, float):
        return None if (np.isnan(obj) or np.isinf(obj)) else obj
    return obj
 # SURVEY PARAMETER PARSER
@ -247,9 +264,11 @@ class UpperLimitView(APIView):
        # CONSTANTS
        # **************************************************************
-        EEF = .9      # eclosed energy fraction, .5 for hpd, .9 for w90
+        #EEF = .9      # eclosed energy fraction, .5 for hpd, .9 for w90
-        ECF = 4e-11   # energy conversion factor
+        #ECF = 4e-11   # energy conversion factor
        EEF = .80091           # use values from the paper
        ECF = 3.3423184e-11
        # BAYESIAN IMPLEMENTATION VIA POISSON_CONF_INTERVAL
@ -268,7 +287,7 @@ class UpperLimitView(APIView):
        bayesian_rate_ul = bayesian_count_ul / t / EEF # count rate limits
        bayesian_rate_ll = bayesian_count_ll / t / EEF
-        bayesian_flux_ul = bayesian_rate_ul * ECF   # flux limits
+        bayesian_flux_ul = bayesian_rate_ul * ECF      # flux limits
        bayesian_flux_ll = bayesian_rate_ll * ECF
        # CLASSICAL IMPLEMENTATION VIA GAMMAINCCINV
@ -277,28 +296,32 @@ class UpperLimitView(APIView):
        classic_count_ul = sp.gammainccinv(N+1, 1 - confidence_level) - B
        classic_count_ll = sp.gammainccinv(N, confidence_level) - B
-        classic_count_ll = max(classic_count_ll, 0)        
+        if not np.isfinite(classic_count_ll) or classic_count_ll < 0:
            classic_count_ll = 0.0
        classic_rate_ul = classic_count_ul / t / EEF # count rate limits
        classic_rate_ll = classic_count_ll / t / EEF 
-        classic_flux_ul = classic_rate_ul * ECF   # flux limits
+        classic_flux_ul = classic_rate_ul * ECF      # flux limits
        classic_flux_ll = classic_rate_ll * ECF
        # FLUX ESTIMATION
        # ****************************************************************
-        S = N - B            # counts as simply counts within aperture 
+        S = N - B             # counts as simply counts within aperture 
                              # with the background estimate subtracted
-        CR = S / t / EEF     # count rate
+        CR = S / t / EEF      # count rate
        FL = CR * ECF         # conversion to flux
        Flux = max(FL, 0)     # flux cannot be lower than zero
        # RESULT ASSEMBLY
        # ****************************************************************
-        return Response({
+        result = {
            'ClassicUpperLimit' : classic_count_ul,
            'ClassicLowerLimit' : classic_count_ll,
@ -316,6 +339,8 @@ class UpperLimitView(APIView):
            'FluxEstimate' : Flux,
            # raw counts and exposure omitted from the response
            # 'N' : N,
            # 'Nnpix' : Nnpix,
            # 'Bcounts' : Bcounts,
@ -324,4 +349,8 @@ class UpperLimitView(APIView):
            # 'tsum' : tsum,
            # 't' : t
-        }, status=status.HTTP_200_OK)
+        }
        clean = sanitize(result) # calling sanitize() to convert NaN to null
        return Response(clean, status=status.HTTP_200_OK)