commit

2022-08-30 15:10:28 +03:00 · 2022-08-30 15:10:28 +03:00 · 2a759d92ef
commit 2a759d92ef
2 changed files with 445 additions and 0 deletions
--- a/get_region_archive_table.py
+++ b/get_region_archive_table.py
@ -0,0 +1,176 @@
+# %%
+from get_region_pack import *
+import numpy as np
+import pandas as pd
+from astropy.table import Table
+from astropy.coordinates import SkyCoord
+from astropy import units as u
+import multiprocessing
+from multiprocessing import get_context
+import warnings
+import time
+import os
+warnings.filterwarnings('ignore')
+# %%
+def ellipse(array):
+    grid = np.indices(array.shape)[::-1]
+    center = [((curr_axis*array).sum()/array.sum()) for curr_axis in grid]
+    y,x = np.where(array)
+    return center, np.abs(x-center[0]).max()*2, np.abs(y-center[1]).max()*2
+def mask_ellipse(array):
+    (center_x, center_y), len_x, len_y = ellipse(array)
+    x,y = np.indices(array.shape)[::-1]
+    radius = ((x-center_x)/(0.5*len_x))**2+((y-center_y)/(0.5*len_y))**2
+    return radius <= 1
+def poisson_divider(array):
+    sub_arrays = np.zeros((4,180,180))
+    for i in range(2):
+        for j in range(2):
+            sub_arrays[i+2*j] = array[180*i:180*(i+1),180*j:180*(j+1)].filled(-1000)
+    pix_sum = 0
+    for layer in sub_arrays:
+        _masked_array = np.ma.masked_less(layer,0)
+        pix_sum += ((_masked_array-_masked_array.mean())**2/_masked_array.mean()).sum()
+    pix_sum /= np.logical_not(array.mask).sum()
+    return pix_sum
+def process(argument):
+    idx, obs_name = argument
+    bin_num = 6
+    try:
+        obs = Observation(obs_name)
+        sky_coord = SkyCoord(ra=obs.ra*u.deg,dec=obs.dec*u.deg,frame='fk5').transform_to('galactic')
+        lon, lat = sky_coord.l.value, sky_coord.b.value
+        rem_signal, rem_area, poiss_comp, rms = np.zeros((4,2**bin_num))
+        region = np.zeros(obs.data.shape, dtype=bool)
+        rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
+        masked_obs = np.ma.masked_array(obs.data, mask = region)
+        good_lvl = np.zeros(bin_num,dtype=bool)
+        good_idx = 0
+        if obs.exposure > 1000:
+            wav_obs = obs.wavdecomp('gauss',(5,3),occ_coeff=True)
+            occ_coeff = obs.get_coeff()
+            for idx, lvl in enumerate(binary_array(bin_num)):
+                try:
+                    # region = np.array([layer>0 for layer in wav_obs[2:-1][lvl]]).sum(0).astype(bool)
+                    region = wav_obs[2:-1][lvl].sum(0)>0
+                except ValueError:
+                    region = np.zeros(obs.data.shape,dtype=bool)
+                masked_obs = np.ma.masked_array(obs.data, mask = region)*occ_coeff
+                rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
+                rem_signal[idx] = 1-obs.data[region].sum()/obs.data.sum()
+                rem_area[idx] = 1 - rem_region.sum()/np.logical_not(obs.data.mask).sum()
+                # poiss_comp[idx] = poisson_divider(masked_obs)
+                poiss_comp[idx] = np.mean((masked_obs-masked_obs.mean())**2/masked_obs.mean())
+                rms[idx] = np.sqrt(((masked_obs-masked_obs.mean())**2).mean())
+                parameter = lambda idx: ((poiss_comp[idx])**2+((1-rem_area[idx])*0.5)**2)
+                if (parameter(idx)<parameter(good_idx)):
+                    good_idx = idx
+                    good_lvl = lvl
+            try:
+                # region = np.array([layer>0 for layer in wav_obs[2:-1][lvl]]).sum(0).astype(bool)
+                region = wav_obs[2:-1][good_lvl].sum(0)>0
+            except ValueError:
+                region = np.zeros(obs.data.shape,dtype=bool)
+            masked_obs = np.ma.masked_array(obs.data, mask = region)
+            rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
+            to_table = [obs.obs_id,
+                        obs.det,
+                        obs.ra,
+                        obs.dec,
+                        lon,
+                        lat,
+                        obs.time_start, 
+                        obs.exposure,
+                        masked_obs.mean()/obs.exposure, #count rate
+                        1 - rem_region.sum()/np.logical_not(obs.data.mask).sum(), #rem_area
+                        poiss_comp[good_idx],
+                        poiss_comp[0],
+                        rms[good_idx]
+                        ]
+        else:
+            to_table = [obs.obs_id,
+                        obs.det,
+                        obs.ra,
+                        obs.dec,
+                        lon,
+                        lat,
+                        obs.time_start, 
+                        obs.exposure,
+                        -1,
+                        -1, #rem_signal
+                        -1, #rem_area
+                        -1,
+                        -1,
+                        -1
+                        ]
+        return to_table, region.astype(int)
+    except TypeError:
+        return obs_name, np.zeros((360,360))
+#%%  
+if __name__ == '__main__':
+    start = time.perf_counter()
+    processing = True
+    start_new = True
+    group_size = 50
+    fits_folder = 'D:\Programms\Jupyter\Science\Source_mask\\\Archive\Processing_v8'
+    region_folder = f'{fits_folder}\\Region'
+    if not os.path.exists(fits_folder):
+        os.makedirs(fits_folder)
+        os.makedirs(region_folder)
+    obs_list = get_link_list('E:\\Archive\\0[0-9]\\[0-9]',sort_list = True)[:]
+    #FILTERING BY THE FILE SIZE
+    print(f'Finished scanning folders. Found {len(obs_list)} observations.')
+    table = {
+        'obs_id':[], 'detector':[], 'ra':[], 'dec':[], 'lon':[], 'lat':[], 't_start':[], 'exposure':[],
+        'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[]
+        }
+    if start_new:
+        out_table = pd.DataFrame(table)
+        out_table.to_csv(f'{fits_folder}\\test.csv')
+        # out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
+        os.system(f'copy D:\Programms\Jupyter\Science\Source_mask\Archive\Processing_v3\\test_skipped.csv {fits_folder}')
+    #REMOVING PROCESSED OBSERVATIONS
+    # already_processed = fits.getdata(f'{fits_folder}\\test.fits')['obs_name']
+    already_processed_list = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
+    already_skipped_list = pd.read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
+    already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values
+    already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values
+    obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list]
+    not_processed = np.array([(curr not in already_processed) for curr in obs_list_names])
+    not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names])
+    obs_list = obs_list[np.logical_and(not_processed,not_skipped)]
+    print(f'Removed already processed observations. {len(obs_list)} observations remain.')
+    if processing:
+        print('Started processing...')
+        num = 0
+        for group_idx in range(len(obs_list)//group_size+1):
+            print(f'Started group {group_idx}')
+            group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))]
+            max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max()
+            process_num = 10 if max_size<50 else (5 if max_size<200 else (2 if max_size<1000 else 1))
+            print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
+            with get_context('spawn').Pool(processes=process_num) as pool:
+                for result,region in pool.imap(process,enumerate(group_list)):
+                    if type(result) is np.str_:
+                        obs_id = result[result.index('nu'):result.index('_cl.evt')]
+                        print(f'{num:>3} is skipped. File {obs_id}')
+                        num +=1
+                        continue
+                    for key,value in zip(table.keys(),result):
+                        table[key] = [value]
+                    if table['exposure'][0] < 1000:
+                        print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000')
+                        pd.DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
+                        num +=1
+                        continue
+                    pd.DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
+                    fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True)
+                    print(f'{num:>3} {str(result[0])+result[1]} is written.')
+                    num +=1
+            print('Converting generated csv to fits file...')
+            print(f'Current time in: {time.perf_counter()-start}')
+            print(f'Processed {num/len(obs_list)*100:.2f} percent')
+            csv_file = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
+            Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True)
+        print(f'Finished writing: {time.perf_counter()-start}')
+# %%
--- a/get_region_pack.py
+++ b/get_region_pack.py
@ -0,0 +1,269 @@
+# %%
+import import_ipynb
+import numpy as np
+import pandas as pd
+import itertools
+
+from os import listdir, mkdir, stat
+
+from scipy.signal import fftconvolve, convolve2d
+
+import matplotlib.pyplot as plt
+from matplotlib.colors import SymLogNorm as lognorm
+
+from astropy.io import fits
+from astropy.wcs import WCS
+
+from glob import glob
+# %%
+def binary_array(num):
+    variants = [[0,1],]*num
+    out = np.zeros((2**num,num),bool)
+    for idx, level in enumerate(itertools.product(*variants)):
+        out[idx] = np.array(level,dtype=bool)
+    return out
+def create_array(filename,mode='Sky'):
+    temp = fits.getdata(filename,1)
+    if mode == 'Sky':
+        return np.histogram2d(temp['Y'],temp['X'],1000,[[0, 1000], [0, 1000]])[0]
+    if mode == 'Det':
+        return np.histogram2d(temp['DET1Y'],temp['DET1X'],360,[[0, 360], [0, 360]])[0]
+def get_wcs(file):
+    header = file[1].header
+    wcs = WCS({
+        'CTYPE1': header['TCTYP38'], 'CTYPE2': header['TCTYP39'],
+        'CUNIT1': header['TCUNI38'], 'CUNIT2': header['TCUNI39'],
+        'CDELT1': header['TCDLT38'], 'CDELT2': header['TCDLT39'],
+        'CRPIX1': header['TCRPX38'], 'CRPIX2': header['TCRPX39'],
+        'CRVAL1': header['TCRVL38'], 'CRVAL2': header['TCRVL39'],
+        'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39']
+    })
+    return wcs
+def get_link_list(folder, sort_list=False):
+    links = glob(f'{folder}\\**\\*_cl.evt',recursive=True)
+    sorted_list = sorted(links, key=lambda x: stat(x).st_size) 
+    return np.array(sorted_list)
+def atrous(level = 0, resize = False, max_size = 1001):
+    base = 1/256*np.array([
+        [1,  4,  6,  4, 1],
+        [4, 16, 24, 16, 4],
+        [6, 24, 36, 24, 6],
+        [4, 16, 24, 16, 4],
+        [1,  4,  6,  4, 1],
+    ])
+    size = 2**level * (base.shape[0]-1)+1
+    output = np.zeros((size,size))
+    output[::2**level, ::2**level] = base
+    if resize:
+        output = np.pad(output, pad_width=2**(level+1))
+    if output.shape[0]>max_size:
+        return output[(size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1, (size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1]
+    return output
+def gauss(level=0, resize=False, max_size = 1000):
+    size = min(5*2**level+1 if not resize else 5*2**(level+1)+1, max_size)
+    sigma = 2**(level)
+    if sigma < 1:
+        out = np.zeros((size+1,size+1))
+        out[int((size-1)/2)+1][int((size-1)/2)+1] = 1
+        return out
+    A = 1/(2*np.pi*sigma**2)**0.5
+    x = A*np.exp( (-(np.arange(size)-(size-1)//2)**2)/(2*sigma**2))
+    out = np.multiply.outer(x,x)
+    return out
+def sigma(mode='atrous',level=0):
+    if mode=='atrous':
+        sigma = [0.8908, 0.20066, 0.08551, 0.04122, 0.02042, 0.0114]
+    elif mode=='gauss':
+        sigma = [0.912579, 0.125101, 0.104892, 4.97810e-02, 2.46556e-02, 1.14364e-02]
+    if level < 6:
+        return sigma[level]
+    else:
+        return sigma[5]/2**(level-5)
+def adjecent(array):
+    grid = np.array([
+        [1,1,1],
+        [1,0,1],
+        [1,1,1]
+    ])
+    output = fftconvolve(array,grid,mode='same') >= 0.5
+    try:
+        output = np.logical_and(np.logical_and(output, np.logical_not(array)),np.logical_not(array.mask))
+    except AttributeError:
+        output = np.logical_and(output, np.logical_not(array))
+    output = np.argwhere(output == True)
+    return output[:,0], output[:,1]
+def add_borders(array,middle=True):
+        # array_blurred = convolve2d(array,np.ones((5,5)),mode='same')
+        mask = np.zeros(array.shape)
+        datax, datay = np.any(array>0,0), np.any(array>0,1)
+        # datax, datay = np.any(array_blurred>0,0), np.any(array_blurred>0,1)
+        #Add border masks
+        x_min, y_min = np.argmax(datax), np.argmax(datay)
+        x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1])
+        # x_mid_min, y_mid_min = x_min+10+np.argmin(datax[x_min+10:]), y_min+10+np.argmin(datay[y_min+10:])
+        # x_mid_max, y_mid_max = x_max-10-np.argmin(datax[x_max-11::-1]), y_max-10-np.argmin(datay[y_max-11::-1])
+        mask[y_min:y_max,x_min:x_max] = True
+        if middle is True:
+            mask[176:191,:] = False
+            mask[:,176:191] = False
+            # mask[y_mid_min:y_mid_max,:] = False
+            # mask[:,x_mid_min:x_mid_max] = False
+        mask = np.logical_not(mask)
+        return mask
+def fill_poisson(array, size_input=32):
+    if not(isinstance(array,np.ma.MaskedArray)):
+        print('No mask found')
+        return array
+    size = size_input
+    output = array.data.copy()
+    mask = array.mask.copy()
+    while mask.sum()>1:
+        kernel = np.ones((size,size))/size**2
+        # coeff = fftconvolve(np.logical_not(mask), kernel, mode='same')
+        coeff = fftconvolve(np.logical_not(mask),kernel,mode='same')
+        mean = fftconvolve(output,kernel,mode='same')
+        idx = np.where(np.logical_and(mask,coeff>0.1))
+        output[idx] = np.random.poisson(np.abs(mean[idx]/coeff[idx]))
+        mask[idx] = False
+        size *= 2
+    return output
+def fill_mean(array,size_input=3):
+    size = size_input
+    if not(isinstance(array,np.ma.MaskedArray)):
+        print('No mask found')
+        return array
+    output = array.filled(0)
+    for i,j in zip(*np.where(array.mask)):
+        output[i,j] = array[max(0,i-size):min(array.shape[0]-1,i+size+1),max(0,j-size):min(array.shape[1]-1,j+size+1)].mean()
+    while np.isnan(output).any():
+        size += 5
+        for i,j in zip(*np.where(np.isnan(output))):
+            output[i,j] = array[max(0,i-size):min(array.shape[0]-1,i+size+1),max(0,j-size):min(array.shape[1]-1,j+size+1)].mean()
+    return output
+def mirror(array):
+    size = array.shape[0]
+    output = np.tile(array,(3,3))
+    output[0:size] = np.flipud(output[0:size])
+    output[2*size:3*size] = np.flipud(output[2*size:3*size])
+    output[:,0:size] = np.fliplr(output[:,0:size])
+    output[:,2*size:3*size] = np.fliplr(output[:,2*size:3*size])
+    return output
+class Observation:
+    def __init__(self, file_name, E_borders=[3,20]):
+        self.filename = file_name
+        self.name = file_name[file_name.find('nu'):].replace('_cl.evt','')
+        with fits.open(file_name) as file:
+            self.obs_id = file[0].header['OBS_ID']
+            self.ra = file[0].header['RA_NOM']
+            self.dec = file[0].header['DEC_NOM']
+            self.time_start = file[0].header['TSTART']
+            self.header = file[0].header
+            self.det = self.header['INSTRUME'][-1]
+            self.wcs = get_wcs(file)
+            self.data = np.ma.masked_array(*self.get_data(file,E_borders))
+            self.hard_mask = add_borders(self.data.data,middle=False)
+        shift = shift = int((360-64)/2)
+        self.model = (fits.getdata(f'D:\Programms\Jupyter\Science\Source_mask\Model/det1_fpm{self.det}.cxb.fits',0)
+                *(180/np.pi/10150)**2)[shift:360+shift,shift:360+shift]
+        self.exposure = self.header['EXPOSURE']
+    # def get_coeff(self,folder='D:\\Programms\\Jupyter\\Science\\Source_mask\\Archive\\OCC_observations'):
+    #     try:
+    #         # _occ_list = get_link_list('D:\\Programms\\Jupyter\\Science\\Source_mask\\Archive\\OCC_observations')
+    #         _occ_list = get_link_list(folder)
+    #         occ_list = dict()
+    #         for name in _occ_list:
+    #             occ_list[name[name.index('nu'):name.index('02_cl.evt')]] = name
+    #         occ = Observation(occ_list[self.name[:-2]],E_borders=[10,20])
+    #         output = np.zeros((360,360))
+    #         for i in range(2):
+    #             for j in range(2):
+    #                 _temp = occ.data[180*i:180*(i+1),180*j:180*(j+1)].mean()
+    #                 output[180*i:180*(i+1),180*j:180*(j+1)] = _temp if _temp != 0 else 1
+    #     except KeyError:
+    #         output = np.ones((360,360))
+    #     out = output.min()/output
+    #     if np.any(out<0.8):
+    #         return np.ones((360,360))
+    #     else:
+    #         return out    
+    def get_coeff(self):
+        # coeff = np.array([0.972,0.895,1.16,1.02]) if self.det=='A' else np.array([0.991,1.047,1.012,0.956])
+        coeff = np.array([0.977,0.861,1.163,1.05]) if self.det=='A' else np.array([1.004,0.997,1.025,0.979])
+        test = (coeff).reshape(2,2).repeat(180,0).repeat(180,1)
+        return test
+    def get_data(self, file, E_borders=[3,20]):
+        PI_min, PI_max = (np.array(E_borders)-1.6)/0.04
+        data = file[1].data.copy()
+        idx_mask = (data['STATUS'].sum(1) == 0)        
+        idx_output = np.logical_and(idx_mask,np.logical_and((data['PI']>PI_min),(data['PI']<PI_max)))
+        data_output = data[idx_output]
+        data_mask = data[np.logical_not(idx_mask)]
+        build_hist = lambda array: np.histogram2d(array['DET1Y'],array['DET1X'],360,[[0,360],[0,360]])[0]
+        output = build_hist(data_output)
+        mask = build_hist(data_mask)
+        mask = np.logical_or(mask,add_borders(output))
+        mask = np.logical_or(mask, self.get_bad_pix(file))
+        return output, mask
+    def get_bad_pix(self, file):
+        output = np.zeros((360,360))
+        kernel = np.ones((5,5))
+        for i in range(4):
+            pixpos_file = fits.getdata(f'D:\\Programms\\Jupyter\\Science\\Source_mask\\Pixpos\\nu{self.det}pixpos20100101v007.fits',i+1)
+            bad_pix_file = file[3+i].data.copy()
+            temp = np.zeros(len(pixpos_file),dtype=bool)
+            for x,y in zip(bad_pix_file['rawx'],bad_pix_file['rawy']):    
+                temp = np.logical_or(temp, np.equal(pixpos_file['rawx'],x)*np.equal(pixpos_file['rawy'],y))
+            temp = pixpos_file[temp] 
+            output += np.histogram2d(temp['REF_DET1Y'],temp['REF_DET1X'], 360, [[0,360],[0,360]])[0]
+        output = convolve2d(output, kernel, mode='same') >0
+        return output
+    def wavdecomp(self, mode = 'atrous', thresh=False, iteration = 1,occ_coeff = False):
+        #THRESHOLD
+        if type(thresh) is int: thresh_max, thresh_add = thresh, thresh/2
+        elif type(thresh) is tuple: thresh_max, thresh_add = thresh
+        #INIT BASIC VARIABLES
+        wavelet = globals()[mode]
+        # max_level = int(np.ceil(np.log2(self.data.shape[0])))+1
+        max_level = 8
+        conv_out = np.zeros((max_level+1,self.data.shape[0],self.data.shape[1]))
+        size = self.data.shape[0]
+        #PREPARE ORIGINAL DATA FOR ANALYSIS: FILL THE HOLES + MIRROR + DETECTOR CORRECTION
+        data = fill_poisson(self.data)
+        if occ_coeff: data = data*self.get_coeff()
+        data = mirror(data)
+        data_bkg = data.copy()
+        #ITERATIVELY CONDUCT WAVLET DECOMPOSITION
+        for i in range(max_level):
+            for num_iter in range(iteration):
+                conv = fftconvolve(data,wavelet(i),mode='same')
+                temp_out = data-conv
+                #ERRORMAP CALCULATION
+                if thresh_max != 0:
+                    sig = sigma(mode, i)
+                    bkg = fftconvolve(data_bkg, wavelet(i),mode='same')
+                    bkg[bkg<0] = 0
+                    # err = (1+np.sqrt(bkg/sig**2 + 0.75))*sig**3
+                    err = (1+np.sqrt(bkg+0.75))*sig
+                    significant = (np.abs(temp_out)> thresh_max*err)[size:2*size,size:2*size]
+                    # significant = (temp_out > thresh_max*err)[size:2*size,size:2*size]
+                    if thresh_add != 0:
+                        add_significant = (np.abs(temp_out)> thresh_add*err)[size:2*size,size:2*size]
+                        # add_significant = (temp_out > thresh_add*err)[size:2*size,size:2*size]
+                        adj = adjecent(significant)
+                        add_condition = np.logical_and(add_significant[adj[0],adj[1]],np.logical_not(significant[adj[0],adj[1]]))
+                        while (add_condition).any():
+                            to_add = adj[0][add_condition], adj[1][add_condition] 
+                            significant[to_add[0],to_add[1]] = True
+                            adj = adjecent(significant)
+                            add_condition = np.logical_and(add_significant[adj[0],adj[1]],np.logical_not(significant[adj[0],adj[1]]))
+                            # add_condition = np.logical_and(np.abs(temp_out)[adj[0],adj[1]] >= thresh_add*err[adj[0],adj[1]], np.logical_not(significant)[adj[0],adj[1]])
+                    temp_out[size:2*size,size:2*size][np.logical_not(significant)] = 0
+                #WRITING THE WAVELET DECOMP LAYER
+                if temp_out[size:2*size,size:2*size].sum() == 0: break
+                conv_out[i] = +temp_out[size:2*size,size:2*size]
+                conv_out[i][conv_out[i]<0]=0 #leave only positive data to prevent problems while summing layers
+                # conv_out[i] = significant
+                data = conv
+        conv_out[max_level] = conv[size:2*size,size:2*size]
+        return conv_out
+# %%