This commit is contained in:
Andrey Mukhin 2022-08-30 15:10:28 +03:00
commit 2a759d92ef
2 changed files with 445 additions and 0 deletions

176
get_region_archive_table.py Normal file
View File

@ -0,0 +1,176 @@
# %%
from get_region_pack import *
import numpy as np
import pandas as pd
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
import multiprocessing
from multiprocessing import get_context
import warnings
import time
import os
warnings.filterwarnings('ignore')
# %%
def ellipse(array):
grid = np.indices(array.shape)[::-1]
center = [((curr_axis*array).sum()/array.sum()) for curr_axis in grid]
y,x = np.where(array)
return center, np.abs(x-center[0]).max()*2, np.abs(y-center[1]).max()*2
def mask_ellipse(array):
(center_x, center_y), len_x, len_y = ellipse(array)
x,y = np.indices(array.shape)[::-1]
radius = ((x-center_x)/(0.5*len_x))**2+((y-center_y)/(0.5*len_y))**2
return radius <= 1
def poisson_divider(array):
sub_arrays = np.zeros((4,180,180))
for i in range(2):
for j in range(2):
sub_arrays[i+2*j] = array[180*i:180*(i+1),180*j:180*(j+1)].filled(-1000)
pix_sum = 0
for layer in sub_arrays:
_masked_array = np.ma.masked_less(layer,0)
pix_sum += ((_masked_array-_masked_array.mean())**2/_masked_array.mean()).sum()
pix_sum /= np.logical_not(array.mask).sum()
return pix_sum
def process(argument):
idx, obs_name = argument
bin_num = 6
try:
obs = Observation(obs_name)
sky_coord = SkyCoord(ra=obs.ra*u.deg,dec=obs.dec*u.deg,frame='fk5').transform_to('galactic')
lon, lat = sky_coord.l.value, sky_coord.b.value
rem_signal, rem_area, poiss_comp, rms = np.zeros((4,2**bin_num))
region = np.zeros(obs.data.shape, dtype=bool)
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
masked_obs = np.ma.masked_array(obs.data, mask = region)
good_lvl = np.zeros(bin_num,dtype=bool)
good_idx = 0
if obs.exposure > 1000:
wav_obs = obs.wavdecomp('gauss',(5,3),occ_coeff=True)
occ_coeff = obs.get_coeff()
for idx, lvl in enumerate(binary_array(bin_num)):
try:
# region = np.array([layer>0 for layer in wav_obs[2:-1][lvl]]).sum(0).astype(bool)
region = wav_obs[2:-1][lvl].sum(0)>0
except ValueError:
region = np.zeros(obs.data.shape,dtype=bool)
masked_obs = np.ma.masked_array(obs.data, mask = region)*occ_coeff
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
rem_signal[idx] = 1-obs.data[region].sum()/obs.data.sum()
rem_area[idx] = 1 - rem_region.sum()/np.logical_not(obs.data.mask).sum()
# poiss_comp[idx] = poisson_divider(masked_obs)
poiss_comp[idx] = np.mean((masked_obs-masked_obs.mean())**2/masked_obs.mean())
rms[idx] = np.sqrt(((masked_obs-masked_obs.mean())**2).mean())
parameter = lambda idx: ((poiss_comp[idx])**2+((1-rem_area[idx])*0.5)**2)
if (parameter(idx)<parameter(good_idx)):
good_idx = idx
good_lvl = lvl
try:
# region = np.array([layer>0 for layer in wav_obs[2:-1][lvl]]).sum(0).astype(bool)
region = wav_obs[2:-1][good_lvl].sum(0)>0
except ValueError:
region = np.zeros(obs.data.shape,dtype=bool)
masked_obs = np.ma.masked_array(obs.data, mask = region)
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
to_table = [obs.obs_id,
obs.det,
obs.ra,
obs.dec,
lon,
lat,
obs.time_start,
obs.exposure,
masked_obs.mean()/obs.exposure, #count rate
1 - rem_region.sum()/np.logical_not(obs.data.mask).sum(), #rem_area
poiss_comp[good_idx],
poiss_comp[0],
rms[good_idx]
]
else:
to_table = [obs.obs_id,
obs.det,
obs.ra,
obs.dec,
lon,
lat,
obs.time_start,
obs.exposure,
-1,
-1, #rem_signal
-1, #rem_area
-1,
-1,
-1
]
return to_table, region.astype(int)
except TypeError:
return obs_name, np.zeros((360,360))
#%%
if __name__ == '__main__':
start = time.perf_counter()
processing = True
start_new = True
group_size = 50
fits_folder = 'D:\Programms\Jupyter\Science\Source_mask\\\Archive\Processing_v8'
region_folder = f'{fits_folder}\\Region'
if not os.path.exists(fits_folder):
os.makedirs(fits_folder)
os.makedirs(region_folder)
obs_list = get_link_list('E:\\Archive\\0[0-9]\\[0-9]',sort_list = True)[:]
#FILTERING BY THE FILE SIZE
print(f'Finished scanning folders. Found {len(obs_list)} observations.')
table = {
'obs_id':[], 'detector':[], 'ra':[], 'dec':[], 'lon':[], 'lat':[], 't_start':[], 'exposure':[],
'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[]
}
if start_new:
out_table = pd.DataFrame(table)
out_table.to_csv(f'{fits_folder}\\test.csv')
# out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
os.system(f'copy D:\Programms\Jupyter\Science\Source_mask\Archive\Processing_v3\\test_skipped.csv {fits_folder}')
#REMOVING PROCESSED OBSERVATIONS
# already_processed = fits.getdata(f'{fits_folder}\\test.fits')['obs_name']
already_processed_list = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
already_skipped_list = pd.read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values
already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values
obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list]
not_processed = np.array([(curr not in already_processed) for curr in obs_list_names])
not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names])
obs_list = obs_list[np.logical_and(not_processed,not_skipped)]
print(f'Removed already processed observations. {len(obs_list)} observations remain.')
if processing:
print('Started processing...')
num = 0
for group_idx in range(len(obs_list)//group_size+1):
print(f'Started group {group_idx}')
group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))]
max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max()
process_num = 10 if max_size<50 else (5 if max_size<200 else (2 if max_size<1000 else 1))
print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
with get_context('spawn').Pool(processes=process_num) as pool:
for result,region in pool.imap(process,enumerate(group_list)):
if type(result) is np.str_:
obs_id = result[result.index('nu'):result.index('_cl.evt')]
print(f'{num:>3} is skipped. File {obs_id}')
num +=1
continue
for key,value in zip(table.keys(),result):
table[key] = [value]
if table['exposure'][0] < 1000:
print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000')
pd.DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
num +=1
continue
pd.DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True)
print(f'{num:>3} {str(result[0])+result[1]} is written.')
num +=1
print('Converting generated csv to fits file...')
print(f'Current time in: {time.perf_counter()-start}')
print(f'Processed {num/len(obs_list)*100:.2f} percent')
csv_file = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True)
print(f'Finished writing: {time.perf_counter()-start}')
# %%

269
get_region_pack.py Normal file
View File

@ -0,0 +1,269 @@
# %%
import import_ipynb
import numpy as np
import pandas as pd
import itertools
from os import listdir, mkdir, stat
from scipy.signal import fftconvolve, convolve2d
import matplotlib.pyplot as plt
from matplotlib.colors import SymLogNorm as lognorm
from astropy.io import fits
from astropy.wcs import WCS
from glob import glob
# %%
def binary_array(num):
variants = [[0,1],]*num
out = np.zeros((2**num,num),bool)
for idx, level in enumerate(itertools.product(*variants)):
out[idx] = np.array(level,dtype=bool)
return out
def create_array(filename,mode='Sky'):
temp = fits.getdata(filename,1)
if mode == 'Sky':
return np.histogram2d(temp['Y'],temp['X'],1000,[[0, 1000], [0, 1000]])[0]
if mode == 'Det':
return np.histogram2d(temp['DET1Y'],temp['DET1X'],360,[[0, 360], [0, 360]])[0]
def get_wcs(file):
header = file[1].header
wcs = WCS({
'CTYPE1': header['TCTYP38'], 'CTYPE2': header['TCTYP39'],
'CUNIT1': header['TCUNI38'], 'CUNIT2': header['TCUNI39'],
'CDELT1': header['TCDLT38'], 'CDELT2': header['TCDLT39'],
'CRPIX1': header['TCRPX38'], 'CRPIX2': header['TCRPX39'],
'CRVAL1': header['TCRVL38'], 'CRVAL2': header['TCRVL39'],
'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39']
})
return wcs
def get_link_list(folder, sort_list=False):
links = glob(f'{folder}\\**\\*_cl.evt',recursive=True)
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
def atrous(level = 0, resize = False, max_size = 1001):
base = 1/256*np.array([
[1, 4, 6, 4, 1],
[4, 16, 24, 16, 4],
[6, 24, 36, 24, 6],
[4, 16, 24, 16, 4],
[1, 4, 6, 4, 1],
])
size = 2**level * (base.shape[0]-1)+1
output = np.zeros((size,size))
output[::2**level, ::2**level] = base
if resize:
output = np.pad(output, pad_width=2**(level+1))
if output.shape[0]>max_size:
return output[(size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1, (size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1]
return output
def gauss(level=0, resize=False, max_size = 1000):
size = min(5*2**level+1 if not resize else 5*2**(level+1)+1, max_size)
sigma = 2**(level)
if sigma < 1:
out = np.zeros((size+1,size+1))
out[int((size-1)/2)+1][int((size-1)/2)+1] = 1
return out
A = 1/(2*np.pi*sigma**2)**0.5
x = A*np.exp( (-(np.arange(size)-(size-1)//2)**2)/(2*sigma**2))
out = np.multiply.outer(x,x)
return out
def sigma(mode='atrous',level=0):
if mode=='atrous':
sigma = [0.8908, 0.20066, 0.08551, 0.04122, 0.02042, 0.0114]
elif mode=='gauss':
sigma = [0.912579, 0.125101, 0.104892, 4.97810e-02, 2.46556e-02, 1.14364e-02]
if level < 6:
return sigma[level]
else:
return sigma[5]/2**(level-5)
def adjecent(array):
grid = np.array([
[1,1,1],
[1,0,1],
[1,1,1]
])
output = fftconvolve(array,grid,mode='same') >= 0.5
try:
output = np.logical_and(np.logical_and(output, np.logical_not(array)),np.logical_not(array.mask))
except AttributeError:
output = np.logical_and(output, np.logical_not(array))
output = np.argwhere(output == True)
return output[:,0], output[:,1]
def add_borders(array,middle=True):
# array_blurred = convolve2d(array,np.ones((5,5)),mode='same')
mask = np.zeros(array.shape)
datax, datay = np.any(array>0,0), np.any(array>0,1)
# datax, datay = np.any(array_blurred>0,0), np.any(array_blurred>0,1)
#Add border masks
x_min, y_min = np.argmax(datax), np.argmax(datay)
x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1])
# x_mid_min, y_mid_min = x_min+10+np.argmin(datax[x_min+10:]), y_min+10+np.argmin(datay[y_min+10:])
# x_mid_max, y_mid_max = x_max-10-np.argmin(datax[x_max-11::-1]), y_max-10-np.argmin(datay[y_max-11::-1])
mask[y_min:y_max,x_min:x_max] = True
if middle is True:
mask[176:191,:] = False
mask[:,176:191] = False
# mask[y_mid_min:y_mid_max,:] = False
# mask[:,x_mid_min:x_mid_max] = False
mask = np.logical_not(mask)
return mask
def fill_poisson(array, size_input=32):
if not(isinstance(array,np.ma.MaskedArray)):
print('No mask found')
return array
size = size_input
output = array.data.copy()
mask = array.mask.copy()
while mask.sum()>1:
kernel = np.ones((size,size))/size**2
# coeff = fftconvolve(np.logical_not(mask), kernel, mode='same')
coeff = fftconvolve(np.logical_not(mask),kernel,mode='same')
mean = fftconvolve(output,kernel,mode='same')
idx = np.where(np.logical_and(mask,coeff>0.1))
output[idx] = np.random.poisson(np.abs(mean[idx]/coeff[idx]))
mask[idx] = False
size *= 2
return output
def fill_mean(array,size_input=3):
size = size_input
if not(isinstance(array,np.ma.MaskedArray)):
print('No mask found')
return array
output = array.filled(0)
for i,j in zip(*np.where(array.mask)):
output[i,j] = array[max(0,i-size):min(array.shape[0]-1,i+size+1),max(0,j-size):min(array.shape[1]-1,j+size+1)].mean()
while np.isnan(output).any():
size += 5
for i,j in zip(*np.where(np.isnan(output))):
output[i,j] = array[max(0,i-size):min(array.shape[0]-1,i+size+1),max(0,j-size):min(array.shape[1]-1,j+size+1)].mean()
return output
def mirror(array):
size = array.shape[0]
output = np.tile(array,(3,3))
output[0:size] = np.flipud(output[0:size])
output[2*size:3*size] = np.flipud(output[2*size:3*size])
output[:,0:size] = np.fliplr(output[:,0:size])
output[:,2*size:3*size] = np.fliplr(output[:,2*size:3*size])
return output
class Observation:
def __init__(self, file_name, E_borders=[3,20]):
self.filename = file_name
self.name = file_name[file_name.find('nu'):].replace('_cl.evt','')
with fits.open(file_name) as file:
self.obs_id = file[0].header['OBS_ID']
self.ra = file[0].header['RA_NOM']
self.dec = file[0].header['DEC_NOM']
self.time_start = file[0].header['TSTART']
self.header = file[0].header
self.det = self.header['INSTRUME'][-1]
self.wcs = get_wcs(file)
self.data = np.ma.masked_array(*self.get_data(file,E_borders))
self.hard_mask = add_borders(self.data.data,middle=False)
shift = shift = int((360-64)/2)
self.model = (fits.getdata(f'D:\Programms\Jupyter\Science\Source_mask\Model/det1_fpm{self.det}.cxb.fits',0)
*(180/np.pi/10150)**2)[shift:360+shift,shift:360+shift]
self.exposure = self.header['EXPOSURE']
# def get_coeff(self,folder='D:\\Programms\\Jupyter\\Science\\Source_mask\\Archive\\OCC_observations'):
# try:
# # _occ_list = get_link_list('D:\\Programms\\Jupyter\\Science\\Source_mask\\Archive\\OCC_observations')
# _occ_list = get_link_list(folder)
# occ_list = dict()
# for name in _occ_list:
# occ_list[name[name.index('nu'):name.index('02_cl.evt')]] = name
# occ = Observation(occ_list[self.name[:-2]],E_borders=[10,20])
# output = np.zeros((360,360))
# for i in range(2):
# for j in range(2):
# _temp = occ.data[180*i:180*(i+1),180*j:180*(j+1)].mean()
# output[180*i:180*(i+1),180*j:180*(j+1)] = _temp if _temp != 0 else 1
# except KeyError:
# output = np.ones((360,360))
# out = output.min()/output
# if np.any(out<0.8):
# return np.ones((360,360))
# else:
# return out
def get_coeff(self):
# coeff = np.array([0.972,0.895,1.16,1.02]) if self.det=='A' else np.array([0.991,1.047,1.012,0.956])
coeff = np.array([0.977,0.861,1.163,1.05]) if self.det=='A' else np.array([1.004,0.997,1.025,0.979])
test = (coeff).reshape(2,2).repeat(180,0).repeat(180,1)
return test
def get_data(self, file, E_borders=[3,20]):
PI_min, PI_max = (np.array(E_borders)-1.6)/0.04
data = file[1].data.copy()
idx_mask = (data['STATUS'].sum(1) == 0)
idx_output = np.logical_and(idx_mask,np.logical_and((data['PI']>PI_min),(data['PI']<PI_max)))
data_output = data[idx_output]
data_mask = data[np.logical_not(idx_mask)]
build_hist = lambda array: np.histogram2d(array['DET1Y'],array['DET1X'],360,[[0,360],[0,360]])[0]
output = build_hist(data_output)
mask = build_hist(data_mask)
mask = np.logical_or(mask,add_borders(output))
mask = np.logical_or(mask, self.get_bad_pix(file))
return output, mask
def get_bad_pix(self, file):
output = np.zeros((360,360))
kernel = np.ones((5,5))
for i in range(4):
pixpos_file = fits.getdata(f'D:\\Programms\\Jupyter\\Science\\Source_mask\\Pixpos\\nu{self.det}pixpos20100101v007.fits',i+1)
bad_pix_file = file[3+i].data.copy()
temp = np.zeros(len(pixpos_file),dtype=bool)
for x,y in zip(bad_pix_file['rawx'],bad_pix_file['rawy']):
temp = np.logical_or(temp, np.equal(pixpos_file['rawx'],x)*np.equal(pixpos_file['rawy'],y))
temp = pixpos_file[temp]
output += np.histogram2d(temp['REF_DET1Y'],temp['REF_DET1X'], 360, [[0,360],[0,360]])[0]
output = convolve2d(output, kernel, mode='same') >0
return output
def wavdecomp(self, mode = 'atrous', thresh=False, iteration = 1,occ_coeff = False):
#THRESHOLD
if type(thresh) is int: thresh_max, thresh_add = thresh, thresh/2
elif type(thresh) is tuple: thresh_max, thresh_add = thresh
#INIT BASIC VARIABLES
wavelet = globals()[mode]
# max_level = int(np.ceil(np.log2(self.data.shape[0])))+1
max_level = 8
conv_out = np.zeros((max_level+1,self.data.shape[0],self.data.shape[1]))
size = self.data.shape[0]
#PREPARE ORIGINAL DATA FOR ANALYSIS: FILL THE HOLES + MIRROR + DETECTOR CORRECTION
data = fill_poisson(self.data)
if occ_coeff: data = data*self.get_coeff()
data = mirror(data)
data_bkg = data.copy()
#ITERATIVELY CONDUCT WAVLET DECOMPOSITION
for i in range(max_level):
for num_iter in range(iteration):
conv = fftconvolve(data,wavelet(i),mode='same')
temp_out = data-conv
#ERRORMAP CALCULATION
if thresh_max != 0:
sig = sigma(mode, i)
bkg = fftconvolve(data_bkg, wavelet(i),mode='same')
bkg[bkg<0] = 0
# err = (1+np.sqrt(bkg/sig**2 + 0.75))*sig**3
err = (1+np.sqrt(bkg+0.75))*sig
significant = (np.abs(temp_out)> thresh_max*err)[size:2*size,size:2*size]
# significant = (temp_out > thresh_max*err)[size:2*size,size:2*size]
if thresh_add != 0:
add_significant = (np.abs(temp_out)> thresh_add*err)[size:2*size,size:2*size]
# add_significant = (temp_out > thresh_add*err)[size:2*size,size:2*size]
adj = adjecent(significant)
add_condition = np.logical_and(add_significant[adj[0],adj[1]],np.logical_not(significant[adj[0],adj[1]]))
while (add_condition).any():
to_add = adj[0][add_condition], adj[1][add_condition]
significant[to_add[0],to_add[1]] = True
adj = adjecent(significant)
add_condition = np.logical_and(add_significant[adj[0],adj[1]],np.logical_not(significant[adj[0],adj[1]]))
# add_condition = np.logical_and(np.abs(temp_out)[adj[0],adj[1]] >= thresh_add*err[adj[0],adj[1]], np.logical_not(significant)[adj[0],adj[1]])
temp_out[size:2*size,size:2*size][np.logical_not(significant)] = 0
#WRITING THE WAVELET DECOMP LAYER
if temp_out[size:2*size,size:2*size].sum() == 0: break
conv_out[i] = +temp_out[size:2*size,size:2*size]
conv_out[i][conv_out[i]<0]=0 #leave only positive data to prevent problems while summing layers
# conv_out[i] = significant
data = conv
conv_out[max_level] = conv[size:2*size,size:2*size]
return conv_out
# %%