Merge pull request #5 from Andreyousan/code_trimming

Code trimming
This commit is contained in:
Андрей Мухин 2022-09-19 15:56:50 +03:00 committed by GitHub
commit 319a14f2a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 53826 additions and 398 deletions

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
include nuwavsource/pixpos/nuApixpos20100101v007.fits
include nuwavsource/pixpos/nuBpixpos20100101v007.fits

View File

@ -1 +1,49 @@
# wavsource_nustar
# nuwavsource
This package is supposed to be used to detect the sources in NuStar observations and generate a mask excluding the signal from the sources of any kind.
Additionaly, it generates a table containing:
Useful data about the observation:
1. OBS_ID
2. Detector
3. Coordinates in equatorial (ra,dec) and galactical (lon,lat) systems
4. Time of the observation in seconds
5. Exposure
Useful algorythm-related data:
6. Average count rate on unmasked area
7. Portion of unmasked area
8. Specific statistical metric[1] before and after masking the detected sources
9. Root-mean-square of counts in unmasked area
## Installation
This package is to be used with Python 3.x.x
To install tha package write
```bash
pip install nuwavsource
```
## Usage
To use the package in your project, import it in by writing
```python
from nuwavsource import nuwavsource
```
You can process the cl.evt file by creating an Observation class object:
```python
obs = nuwavsource.Observation(path_to_evt_file)
```
Additionally, the energy band in KeV to get events from can be passed as an argument. The default value is [3,20].
```python
obs = nuwavsource.Observation(path_to_evt_file,E_borders=[E_min,E_max])
```

View File

@ -1,175 +0,0 @@
# %%
from get_region_pack import *
import numpy as np
from pandas import DataFrame, read_csv
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from multiprocessing import get_context, cpu_count
import warnings
import time
import os
warnings.filterwarnings('ignore')
# %%
def poisson_divider(array):
sub_arrays = np.zeros((4,180,180))
for i in range(2):
for j in range(2):
sub_arrays[i+2*j] = array[180*i:180*(i+1),180*j:180*(j+1)].filled(-1000)
pix_sum = 0
for layer in sub_arrays:
_masked_array = np.ma.masked_less(layer,0)
pix_sum += ((_masked_array-_masked_array.mean())**2/_masked_array.mean()).sum()
pix_sum /= np.logical_not(array.mask).sum()
return pix_sum
def process(argument):
idx, obs_name = argument
bin_num = 6
try:
obs = Observation(obs_name)
sky_coord = SkyCoord(ra=obs.ra*u.deg,dec=obs.dec*u.deg,frame='fk5').transform_to('galactic')
lon, lat = sky_coord.l.value, sky_coord.b.value
rem_signal, rem_area, poiss_comp, rms = np.zeros((4,2**bin_num))
region = np.zeros(obs.data.shape, dtype=bool)
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
masked_obs = np.ma.masked_array(obs.data, mask = region)
good_lvl = np.zeros(bin_num,dtype=bool)
good_idx = 0
if obs.exposure > 1000:
wav_obs = obs.wavdecomp('gauss',(5,3),occ_coeff=True)
occ_coeff = obs.get_coeff()
for idx, lvl in enumerate(binary_array(bin_num)):
try:
# region = np.array([layer>0 for layer in wav_obs[2:-1][lvl]]).sum(0).astype(bool)
region = wav_obs[2:-1][lvl].sum(0)>0
except ValueError:
region = np.zeros(obs.data.shape,dtype=bool)
masked_obs = np.ma.masked_array(obs.data, mask = region)*occ_coeff
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
rem_signal[idx] = 1-obs.data[region].sum()/obs.data.sum()
rem_area[idx] = 1 - rem_region.sum()/np.logical_not(obs.data.mask).sum()
# poiss_comp[idx] = poisson_divider(masked_obs)
poiss_comp[idx] = np.mean((masked_obs-masked_obs.mean())**2/masked_obs.mean())
rms[idx] = np.sqrt(((masked_obs-masked_obs.mean())**2).mean())
parameter = lambda idx: ((poiss_comp[idx])**2+((1-rem_area[idx])*0.5)**2)
if (parameter(idx)<parameter(good_idx)):
good_idx = idx
good_lvl = lvl
try:
# region = np.array([layer>0 for layer in wav_obs[2:-1][lvl]]).sum(0).astype(bool)
region = wav_obs[2:-1][good_lvl].sum(0)>0
except ValueError:
region = np.zeros(obs.data.shape,dtype=bool)
masked_obs = np.ma.masked_array(obs.data, mask = region)
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
to_table = [obs.obs_id,
obs.det,
obs.ra,
obs.dec,
lon,
lat,
obs.time_start,
obs.exposure,
masked_obs.mean()/obs.exposure, #count rate
1 - rem_region.sum()/np.logical_not(obs.data.mask).sum(), #rem_area
poiss_comp[good_idx],
poiss_comp[0],
rms[good_idx]
]
else:
to_table = [obs.obs_id,
obs.det,
obs.ra,
obs.dec,
lon,
lat,
obs.time_start,
obs.exposure,
-1,
-1, #rem_signal
-1, #rem_area
-1,
-1,
-1
]
return to_table, region.astype(int)
except TypeError:
return obs_name, np.zeros((360,360))
#%%
if __name__ == '__main__':
#DIALOGUE
print('Enter path to the input folder')
input_folder = input()
obs_list = get_link_list(input_folder,sort_list = True)[:]
print('Create new file for this processing? y/n')
continue_old = input()
if continue_old == 'y':
start_new = True
elif continue_old == 'n':
start_new = False
else:
print('Cannot interprete input, closing script')
raise SystemExit(0)
print(f'Enter path to the output folder')
fits_folder = input()
region_folder = f'{fits_folder}\\Region'
#INIT ALL NECESSARY FILES AND VARIBALES
start = time.perf_counter()
processing = True
group_size = 50
os.makedirs(region_folder,exist_ok = True)
#FILTERING BY THE FILE SIZE
print(f'Finished scanning folders. Found {len(obs_list)} observations.')
table = {
'obs_id':[], 'detector':[], 'ra':[], 'dec':[], 'lon':[], 'lat':[], 't_start':[], 'exposure':[],
'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[]
}
if start_new:
out_table = DataFrame(table)
out_table.to_csv(f'{fits_folder}\\test.csv')
out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
#FILTERING OUT PROCESSED OBSERVATIONS
already_processed_list = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
already_skipped_list = read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values
already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values
obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list]
not_processed = np.array([(curr not in already_processed) for curr in obs_list_names])
not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names])
obs_list = obs_list[np.logical_and(not_processed,not_skipped)]
print(f'Removed already processed observations. {len(obs_list)} observations remain.')
#START PROCESSING
if processing:
print('Started processing...')
num = 0
for group_idx in range(len(obs_list)//group_size+1):
print(f'Started group {group_idx}')
group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))]
max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max()
process_num = cpu_count() if max_size<50 else (cpu_count()//2 if max_size<200 else (cpu_count()//4 if max_size<1000 else 1))
print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
with get_context('spawn').Pool(processes=process_num) as pool:
for result,region in pool.imap(process,enumerate(group_list)):
if type(result) is np.str_:
obs_id = result[result.index('nu'):result.index('_cl.evt')]
print(f'{num:>3} is skipped. File {obs_id}')
num +=1
continue
for key,value in zip(table.keys(),result):
table[key] = [value]
if table['exposure'][0] < 1000:
print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000')
DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
num +=1
continue
DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True)
print(f'{num:>3} {str(result[0])+result[1]} is written.')
num +=1
print('Converting generated csv to fits file...')
print(f'Current time in: {(time.perf_counter()-start):.2f}')
print(f'Processed {num/len(obs_list)*100:.2f} percent')
csv_file = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True)
print(f'Finished writing: {time.perf_counter()-start}')
# %%

View File

@ -1,222 +0,0 @@
# %%
import numpy as np
import itertools
from os import stat
from scipy.signal import fftconvolve, convolve2d
from astropy.io import fits
from astropy.wcs import WCS
from glob import glob
# %%
def binary_array(num):
variants = [[0,1],]*num
out = np.zeros((2**num,num),bool)
for idx, level in enumerate(itertools.product(*variants)):
out[idx] = np.array(level,dtype=bool)
return out
def create_array(filename,mode='Sky'):
temp = fits.getdata(filename,1)
if mode == 'Sky':
return np.histogram2d(temp['Y'],temp['X'],1000,[[0, 1000], [0, 1000]])[0]
if mode == 'Det':
return np.histogram2d(temp['DET1Y'],temp['DET1X'],360,[[0, 360], [0, 360]])[0]
def get_wcs(file):
header = file[1].header
wcs = WCS({
'CTYPE1': header['TCTYP38'], 'CTYPE2': header['TCTYP39'],
'CUNIT1': header['TCUNI38'], 'CUNIT2': header['TCUNI39'],
'CDELT1': header['TCDLT38'], 'CDELT2': header['TCDLT39'],
'CRPIX1': header['TCRPX38'], 'CRPIX2': header['TCRPX39'],
'CRVAL1': header['TCRVL38'], 'CRVAL2': header['TCRVL39'],
'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39']
})
return wcs
def get_link_list(folder, sort_list=True):
links = glob(f'{folder}\\**\\*_cl.evt',recursive=True)
if sort_list:
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
else:
return np.array(links)
def atrous(level = 0, resize = False, max_size = 1001):
base = 1/256*np.array([
[1, 4, 6, 4, 1],
[4, 16, 24, 16, 4],
[6, 24, 36, 24, 6],
[4, 16, 24, 16, 4],
[1, 4, 6, 4, 1],
])
size = 2**level * (base.shape[0]-1)+1
output = np.zeros((size,size))
output[::2**level, ::2**level] = base
if resize:
output = np.pad(output, pad_width=2**(level+1))
if output.shape[0]>max_size:
return output[(size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1, (size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1]
return output
def gauss(level=0, resize=False, max_size = 1000):
size = min(5*2**(level+1)+1, max_size)
sigma = 2**(level)
A = 1/(2*np.pi*sigma**2)**0.5
x = A*np.exp( (-(np.arange(size)-(size-1)//2)**2)/(2*sigma**2))
out = np.multiply.outer(x,x)
return out
def adjecent(array):
grid = np.array([
[1,1,1],
[1,0,1],
[1,1,1]
])
output = fftconvolve(array,grid,mode='same') >= 0.5
try:
output = np.logical_and(np.logical_and(output, np.logical_not(array)),np.logical_not(array.mask))
except AttributeError:
output = np.logical_and(output, np.logical_not(array))
output = np.argwhere(output == True)
return output[:,0], output[:,1]
def add_borders(array,middle=True):
mask = np.zeros(array.shape)
datax, datay = np.any(array>0,0), np.any(array>0,1)
#Add border masks
x_min, y_min = np.argmax(datax), np.argmax(datay)
x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1])
mask[y_min:y_max,x_min:x_max] = True
if middle is True:
mask[176:191,:] = False
mask[:,176:191] = False
mask = np.logical_not(mask)
return mask
def fill_poisson(array, size_input=32):
if not(isinstance(array,np.ma.MaskedArray)):
print('No mask found')
return array
size = size_input
output = array.data.copy()
mask = array.mask.copy()
while mask.sum()>1:
kernel = np.ones((size,size))/size**2
coeff = fftconvolve(np.logical_not(mask),kernel,mode='same')
mean = fftconvolve(output,kernel,mode='same')
idx = np.where(np.logical_and(mask,coeff>0.1))
output[idx] = np.random.poisson(np.abs(mean[idx]/coeff[idx]))
mask[idx] = False
size *= 2
return output
def fill_mean(array,size_input=3):
size = size_input
if not(isinstance(array,np.ma.MaskedArray)):
print('No mask found')
return array
output = array.filled(0)
for i,j in zip(*np.where(array.mask)):
output[i,j] = array[max(0,i-size):min(array.shape[0]-1,i+size+1),max(0,j-size):min(array.shape[1]-1,j+size+1)].mean()
while np.isnan(output).any():
size += 5
for i,j in zip(*np.where(np.isnan(output))):
output[i,j] = array[max(0,i-size):min(array.shape[0]-1,i+size+1),max(0,j-size):min(array.shape[1]-1,j+size+1)].mean()
return output
def mirror(array):
size = array.shape[0]
output = np.tile(array,(3,3))
output[0:size] = np.flipud(output[0:size])
output[2*size:3*size] = np.flipud(output[2*size:3*size])
output[:,0:size] = np.fliplr(output[:,0:size])
output[:,2*size:3*size] = np.fliplr(output[:,2*size:3*size])
return output
class Observation:
def __init__(self, file_name, E_borders=[3,20]):
self.filename = file_name
self.name = file_name[file_name.find('nu'):].replace('_cl.evt','')
with fits.open(file_name) as file:
self.obs_id = file[0].header['OBS_ID']
self.ra = file[0].header['RA_NOM']
self.dec = file[0].header['DEC_NOM']
self.time_start = file[0].header['TSTART']
self.header = file[0].header
self.det = self.header['INSTRUME'][-1]
self.wcs = get_wcs(file)
self.data = np.ma.masked_array(*self.get_data(file,E_borders))
self.hard_mask = add_borders(self.data.data,middle=False)
shift = shift = int((360-64)/2)
self.model = (fits.getdata(f'D:\Programms\Jupyter\Science\Source_mask\Model/det1_fpm{self.det}.cxb.fits',0)
*(180/np.pi/10150)**2)[shift:360+shift,shift:360+shift]
self.exposure = self.header['EXPOSURE']
def get_coeff(self):
coeff = np.array([0.977,0.861,1.163,1.05]) if self.det=='A' else np.array([1.004,0.997,1.025,0.979])
resized_coeff = (coeff).reshape(2,2).repeat(180,0).repeat(180,1)
return resized_coeff
def get_data(self, file, E_borders=[3,20]):
PI_min, PI_max = (np.array(E_borders)-1.6)/0.04
data = file[1].data.copy()
idx_mask = (data['STATUS'].sum(1) == 0)
idx_output = np.logical_and(idx_mask,np.logical_and((data['PI']>PI_min),(data['PI']<PI_max)))
data_output = data[idx_output]
data_mask = data[np.logical_not(idx_mask)]
build_hist = lambda array: np.histogram2d(array['DET1Y'],array['DET1X'],360,[[0,360],[0,360]])[0]
output = build_hist(data_output)
mask = build_hist(data_mask)
mask = np.logical_or(mask,add_borders(output))
mask = np.logical_or(mask, self.get_bad_pix(file))
return output, mask
def get_bad_pix(self, file):
output = np.zeros((360,360))
kernel = np.ones((5,5))
for i in range(4):
pixpos_file = fits.getdata(f'D:\\Programms\\Jupyter\\Science\\Source_mask\\Pixpos\\nu{self.det}pixpos20100101v007.fits',i+1)
bad_pix_file = file[3+i].data.copy()
temp = np.zeros(len(pixpos_file),dtype=bool)
for x,y in zip(bad_pix_file['rawx'],bad_pix_file['rawy']):
temp = np.logical_or(temp, np.equal(pixpos_file['rawx'],x)*np.equal(pixpos_file['rawy'],y))
temp = pixpos_file[temp]
output += np.histogram2d(temp['REF_DET1Y'],temp['REF_DET1X'], 360, [[0,360],[0,360]])[0]
output = convolve2d(output, kernel, mode='same') > 0
return output
def wavdecomp(self, mode = 'atrous', thresh=False,occ_coeff = False):
#THRESHOLD
if type(thresh) is int: thresh_max, thresh_add = thresh, thresh/2
elif type(thresh) is tuple: thresh_max, thresh_add = thresh
#INIT NEEDED VARIABLES
wavelet = globals()[mode]
max_level = 8
conv_out = np.zeros((max_level+1,self.data.shape[0],self.data.shape[1]))
size = self.data.shape[0]
#PREPARE ORIGINAL DATA FOR ANALYSIS: FILL THE HOLES + MIRROR + DETECTOR CORRECTION
data = fill_poisson(self.data)
if occ_coeff: data = data*self.get_coeff()
data = mirror(data)
data_bkg = data.copy()
#ITERATIVELY CONDUCT WAVLET DECOMPOSITION
for i in range(max_level):
conv = fftconvolve(data,wavelet(i),mode='same')
temp_out = data-conv
#ERRORMAP CALCULATION
if thresh_max != 0:
sig = ((wavelet(i)**2).sum())**0.5
bkg = fftconvolve(data_bkg, wavelet(i),mode='same')
bkg[bkg<0] = 0
# err = (1+np.sqrt(bkg/sig**2 + 0.75))*sig**3
err = (1+np.sqrt(bkg+0.75))*sig
# significant = (np.abs(temp_out)> thresh_max*err)[size:2*size,size:2*size]
significant = (temp_out > thresh_max*err)[size:2*size,size:2*size]
if thresh_add != 0:
# add_significant = (np.abs(temp_out)> thresh_add*err)[size:2*size,size:2*size]
add_significant = (temp_out > thresh_add*err)[size:2*size,size:2*size]
adj = adjecent(significant)
add_condition = np.logical_and(add_significant[adj[0],adj[1]],np.logical_not(significant[adj[0],adj[1]]))
while (add_condition).any():
to_add = adj[0][add_condition], adj[1][add_condition]
significant[to_add[0],to_add[1]] = True
adj = adjecent(significant)
add_condition = np.logical_and(add_significant[adj[0],adj[1]],np.logical_not(significant[adj[0],adj[1]]))
# add_condition = np.logical_and(np.abs(temp_out)[adj[0],adj[1]] >= thresh_add*err[adj[0],adj[1]], np.logical_not(significant)[adj[0],adj[1]])
temp_out[size:2*size,size:2*size][np.logical_not(significant)] = 0
#WRITING THE WAVELET DECOMP LAYER
conv_out[i] = +temp_out[size:2*size,size:2*size]
conv_out[i][conv_out[i]<0]=0 #leave only positive data to prevent problems while summing layers
data = conv
conv_out[max_level] = conv[size:2*size,size:2*size]
return conv_out
# %%

1
nuwavsource/__init__.py Normal file
View File

@ -0,0 +1 @@
name = 'nuwavsource'

Binary file not shown.

Binary file not shown.

441
nuwavsource/nuwavsource.py Normal file
View File

@ -0,0 +1,441 @@
# %%
import numpy as np
import itertools
from pandas import DataFrame, read_csv
from astropy.table import Table, unique
from astropy.coordinates import SkyCoord
from astropy import units as u
from multiprocessing import get_context, cpu_count
from time import perf_counter
from os import stat, makedirs
from os.path import dirname
from scipy.signal import fftconvolve, convolve2d
from astropy.io import fits
from astropy.wcs import WCS
from glob import glob
from warnings import filterwarnings
filterwarnings('ignore')
def get_link_list(folder, sort_list=True):
links = glob(f'{folder}\\**\\*_cl.evt', recursive=True)
if sort_list:
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
else:
return np.array(links)
def binary_array(num):
variants = [[0, 1], ]*num
out = np.zeros((2**num, num), bool)
for idx, level in enumerate(itertools.product(*variants)):
out[idx] = np.array(level, dtype=bool)
return out
def create_array(filename, mode='Sky'):
temp = fits.getdata(filename, 1)
if mode == 'Sky':
return np.histogram2d(temp['Y'],
temp['X'],
1000,
[[0, 1000], [0, 1000]])[0]
if mode == 'Det':
return np.histogram2d(temp['DET1Y'],
temp['DET1X'],
360,
[[0, 360], [0, 360]])[0]
def get_wcs(file):
header = file[1].header
wcs = WCS({
'CTYPE1': header['TCTYP38'], 'CTYPE2': header['TCTYP39'],
'CUNIT1': header['TCUNI38'], 'CUNIT2': header['TCUNI39'],
'CDELT1': header['TCDLT38'], 'CDELT2': header['TCDLT39'],
'CRPIX1': header['TCRPX38'], 'CRPIX2': header['TCRPX39'],
'CRVAL1': header['TCRVL38'], 'CRVAL2': header['TCRVL39'],
'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39']
})
return wcs
def atrous(level=0, max_size=1001):
base = 1/256*np.array([
[1, 4, 6, 4, 1],
[4, 16, 24, 16, 4],
[6, 24, 36, 24, 6],
[4, 16, 24, 16, 4],
[1, 4, 6, 4, 1],
])
size = 2**level * (base.shape[0]-1)+1
output = np.zeros((size, size))
output[::2**level, ::2**level] = base
if output.shape[0] > max_size:
return output[(size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1,
(size-1)//2-(max_size-1)//2:(size-1)//2+(max_size-1)//2+1]
return output
def gauss(level=0, max_size=1000):
size = min(5*2**(level+1)+1, max_size)
sigma = 2**(level)
A = 1/(2*np.pi*sigma**2)**0.5
x = A*np.exp((-(np.arange(size)-(size-1)//2)**2)/(2*sigma**2))
out = np.multiply.outer(x, x)
return out
def adjecent(array):
grid = np.array([
[1, 1, 1],
[1, 0, 1],
[1, 1, 1]
])
output = fftconvolve(array, grid, mode='same') >= 0.5
try:
output = np.logical_and(np.logical_and(output, np.logical_not(array)),
np.logical_not(array.mask))
except AttributeError:
output = np.logical_and(output, np.logical_not(array))
output = np.argwhere(output == True)
return output[:, 0], output[:, 1]
def add_borders(array, middle=True):
mask = np.zeros(array.shape)
datax, datay = np.any(array > 0, 0), np.any(array > 0, 1)
# Add border masks
x_min, y_min = np.argmax(datax), np.argmax(datay)
x_max = len(datax) - np.argmax(datax[::-1])
y_max = len(datay) - np.argmax(datay[::-1])
mask[y_min:y_max, x_min:x_max] = True
if middle is True:
mask[176:191, :] = False
mask[:, 176:191] = False
mask = np.logical_not(mask)
return mask
def fill_poisson(array, size_input=32):
if not (isinstance(array, np.ma.MaskedArray)):
print('No mask found')
return array
size = size_input
output = array.data.copy()
mask = array.mask.copy()
while mask.sum() > 1:
kernel = np.ones((size, size))/size**2
coeff = fftconvolve(np.logical_not(mask), kernel, mode='same')
mean = fftconvolve(output, kernel, mode='same')
idx = np.where(np.logical_and(mask, coeff > 0.1))
output[idx] = np.random.poisson(np.abs(mean[idx]/coeff[idx]))
mask[idx] = False
size *= 2
return output
def mirror(array):
size = array.shape[0]
output = np.tile(array, (3, 3))
output[0:size] = np.flipud(output[0:size])
output[2*size:3*size] = np.flipud(output[2*size:3*size])
output[:, 0:size] = np.fliplr(output[:, 0:size])
output[:, 2*size:3*size] = np.fliplr(output[:, 2*size:3*size])
return output
class Observation:
def __init__(self, file_name, E_borders=[3,20]):
self.filename = file_name
self.name = file_name[file_name.find('nu'):].replace('_cl.evt','')
with fits.open(file_name) as file:
self.obs_id = file[0].header['OBS_ID']
self.ra = file[0].header['RA_NOM']
self.dec = file[0].header['DEC_NOM']
self.time_start = file[0].header['TSTART']
self.header = file[0].header
self.det = self.header['INSTRUME'][-1]
self.wcs = get_wcs(file)
self.data = np.ma.masked_array(*self.get_data(file, E_borders))
self.hard_mask = add_borders(self.data.data, middle=False)
self.exposure = self.header['EXPOSURE']
def get_coeff(self):
coeff = np.array([0.977, 0.861, 1.163, 1.05]) if self.det == 'A' else np.array([1.004, 0.997, 1.025, 0.979])
resized_coeff = (coeff).reshape(2, 2).repeat(180, 0).repeat(180, 1)
return resized_coeff
def get_data(self, file, E_borders=[3, 20]):
PI_min, PI_max = (np.array(E_borders)-1.6)/0.04
data = file[1].data.copy()
idx_mask = (data['STATUS'].sum(1) == 0)
idx_output = np.logical_and(idx_mask, np.logical_and((data['PI'] > PI_min), (data['PI'] < PI_max)))
data_output = data[idx_output]
data_mask = data[np.logical_not(idx_mask)]
build_hist = lambda array: np.histogram2d(array['DET1Y'], array['DET1X'], 360, [[0, 360], [0, 360]])[0]
output = build_hist(data_output)
mask = build_hist(data_mask)
mask = np.logical_or(mask, add_borders(output))
mask = np.logical_or(mask, self.get_bad_pix(file))
return output, mask
def get_bad_pix(self, file):
output = np.zeros((360, 360))
kernel = np.ones((5, 5))
for i in range(4):
current_dir = dirname(__file__)
pixpos_file = fits.getdata(f'{current_dir}\\pixpos\\nu{self.det}pixpos20100101v007.fits',i+1)
bad_pix_file = file[3+i].data.copy()
temp = np.zeros(len(pixpos_file), dtype=bool)
for x, y in zip(bad_pix_file['rawx'], bad_pix_file['rawy']):
temp = np.logical_or(temp, np.equal(pixpos_file['rawx'], x)*np.equal(pixpos_file['rawy'], y))
temp = pixpos_file[temp]
output += np.histogram2d(temp['REF_DET1Y'], temp['REF_DET1X'], 360, [[0, 360],[0, 360]])[0]
output = convolve2d(output, kernel, mode='same') > 0
return output
def wavdecomp(self, mode='gauss', thresh=False, occ_coeff=False):
# THRESHOLD
if type(thresh) is int:
thresh_max, thresh_add = thresh, thresh/2
elif type(thresh) is tuple:
thresh_max, thresh_add = thresh
# INIT NEEDED VARIABLES
wavelet = globals()[mode]
max_level = 8
conv_out = np.zeros((max_level+1, self.data.shape[0], self.data.shape[1]))
size = self.data.shape[0]
# PREPARE ORIGINAL DATA FOR ANALYSIS: FILL THE HOLES + MIRROR + DETECTOR CORRECTION
data = fill_poisson(self.data)
if occ_coeff:
data = data*self.get_coeff()
data = mirror(data)
data_bkg = data.copy()
# ITERATIVELY CONDUCT WAVLET DECOMPOSITION
for i in range(max_level):
conv = fftconvolve(data, wavelet(i), mode='same')
temp_out = data-conv
# ERRORMAP CALCULATION
if thresh_max != 0:
sig = ((wavelet(i)**2).sum())**0.5
bkg = fftconvolve(data_bkg, wavelet(i), mode='same')
bkg[bkg < 0] = 0
err = (1+np.sqrt(bkg+0.75))*sig
significant = (temp_out > thresh_max*err)[size:2*size, size:2*size]
if thresh_add != 0:
add_significant = (temp_out > thresh_add*err)[size:2*size, size:2*size]
adj = adjecent(significant)
add_condition = np.logical_and(add_significant[adj[0], adj[1]],
np.logical_not(significant[adj[0], adj[1]]))
while (add_condition).any():
to_add = adj[0][add_condition], adj[1][add_condition]
significant[to_add[0], to_add[1]] = True
adj = adjecent(significant)
add_condition = np.logical_and(add_significant[adj[0], adj[1]],
np.logical_not(significant[adj[0],adj[1]]))
temp_out[size:2*size, size:2*size][np.logical_not(significant)] = 0
# WRITING THE WAVELET DECOMP LAYER
conv_out[i] = +temp_out[size:2*size, size:2*size]
# DISCARDING NEGATIVE COMPONENTS OF WAVELETS TO MAKE MASK BY SUMMING WAVELET LAYERS
conv_out[i][conv_out[i] < 0] = 0
data = conv
conv_out[max_level] = conv[size:2*size, size:2*size]
return conv_out
def region_to_raw(self, region):
x_region, y_region = np.where(region)
tables = []
for i in range(4):
current_dir = dirname(__file__)
pixpos = Table(fits.getdata(f'{current_dir}\\pixpos\\nu{self.det}pixpos20100101v007.fits', i+1))
pixpos = pixpos[pixpos['REF_DET1X'] != -1]
test = np.zeros(len(pixpos['REF_DET1X']), dtype=bool)
for idx, (x, y) in enumerate(zip(pixpos['REF_DET1X'], pixpos['REF_DET1Y'])):
test[idx] = np.logical_and(np.equal(x, x_region), np.equal(y, y_region)).any()
table = Table({'RAWX': pixpos['RAWX'][test], 'RAWY': pixpos['RAWY'][test]})
if not table:
tables.append(table)
else:
tables.append(unique(table))
hdu_list = fits.HDUList([
fits.PrimaryHDU(),
fits.table_to_hdu(tables[0]),
fits.table_to_hdu(tables[1]),
fits.table_to_hdu(tables[2]),
fits.table_to_hdu(tables[3]),
])
return hdu_list
def process(args):
"""
Creates a mask using wavelet decomposition and produces some statistical and metadata about the passed observation.
args must contain two arguments: path to the file of interest and threshold, e.g. ('D:\Data\obs_cl.evt',(5,2))
"""
obs_path, thresh = args
bin_num = 6
try:
obs = Observation(obs_path)
sky_coord = SkyCoord(ra=obs.ra*u.deg, dec=obs.dec*u.deg, frame='fk5').transform_to('galactic')
lon, lat = sky_coord.l.value, sky_coord.b.value
rem_signal, rem_area, poiss_comp, rms = np.zeros((4, 2**bin_num))
region = np.zeros(obs.data.shape, dtype=bool)
region_raw = -1
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
masked_obs = np.ma.masked_array(obs.data, mask=region)
good_lvl = np.zeros(bin_num, dtype=bool)
good_idx = 0
if obs.exposure > 1000:
wav_obs = obs.wavdecomp('gauss', thresh, occ_coeff=True)
occ_coeff = obs.get_coeff()
for idx, lvl in enumerate(binary_array(bin_num)):
try:
region = wav_obs[2:-1][lvl].sum(0) > 0
except ValueError:
region = np.zeros(obs.data.shape, dtype=bool)
masked_obs = np.ma.masked_array(obs.data, mask=region)*occ_coeff
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
rem_signal[idx] = 1-obs.data[region].sum()/obs.data.sum()
rem_area[idx] = 1 - rem_region.sum()/np.logical_not(obs.data.mask).sum()
poiss_comp[idx] = np.mean((masked_obs-masked_obs.mean())**2/masked_obs.mean())
rms[idx] = np.sqrt(((masked_obs-masked_obs.mean())**2).mean())
parameter = lambda idx: ((poiss_comp[idx])**2+((1-rem_area[idx])*0.5)**2)
if (parameter(idx) < parameter(good_idx)):
good_idx = idx
good_lvl = lvl
try:
region = wav_obs[2:-1][good_lvl].sum(0) > 0
if region.sum() > 0:
region_raw = obs.region_to_raw(region.astype(int))
except ValueError:
region = np.zeros(obs.data.shape, dtype=bool)
masked_obs = np.ma.masked_array(obs.data, mask=region)
rem_region = np.logical_and(region, np.logical_not(obs.data.mask))
to_table = [obs.obs_id,
obs.det,
obs.ra,
obs.dec,
lon,
lat,
obs.time_start,
obs.exposure,
masked_obs.mean()/obs.exposure, # count rate
1 - rem_region.sum()/np.logical_not(obs.data.mask).sum(), # rem_area
poiss_comp[good_idx],
poiss_comp[0],
rms[good_idx]
]
else:
to_table = [obs.obs_id,
obs.det,
obs.ra,
obs.dec,
lon,
lat,
obs.time_start,
obs.exposure,
-1,
-1, # rem_signal
-1, # rem_area
-1,
-1,
-1
]
return to_table, region.astype(int), region_raw
except TypeError:
return obs_path, -1, -1
def process_folder(input_folder=None, start_new_file=None, fits_folder=None, thresh=None):
# DIALOGUE
if not (input_folder):
print('Enter path to the input folder')
input_folder = input()
if not (start_new_file):
print('Create new file for this processing? y/n')
start_new_file = input()
if start_new_file == 'y':
start_new = True
elif start_new_file == 'n':
start_new = False
else:
print('Cannot interprete input, closing script')
raise SystemExit(0)
if not (fits_folder):
print(f'Enter path to the output folder')
fits_folder = input()
region_folder = f'{fits_folder}\\Region'
region_raw_folder = f'{fits_folder}\\Region_raw'
if not thresh:
print('Enter threshold values for wavelet decomposition:')
print('General threshold:')
_thresh_max = float(input())
print('Additional threshold:')
_thresh_add = float(input())
thresh = (_thresh_max, _thresh_add)
# CREATE ALL NECESSARY FILES AND VARIBALES
obs_list = get_link_list(input_folder, sort_list=True)
start = perf_counter()
group_size = 50
makedirs(region_folder, exist_ok=True)
makedirs(region_raw_folder, exist_ok=True)
# FILTERING BY THE FILE SIZE
print(f'Finished scanning folders. Found {len(obs_list)} observations.')
table = {
'obs_id': [], 'detector': [], 'ra': [], 'dec': [],
'lon': [], 'lat': [], 't_start': [], 'exposure': [],
'count_rate': [], 'remaining_area': [], 'poisson_chi2': [],
'poisson_chi2_full': [], 'rms': []
}
if start_new:
out_table = DataFrame(table)
out_table.to_csv(f'{fits_folder}\\test.csv')
out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
# FILTERING OUT PROCESSED OBSERVATIONS
already_processed_list = read_csv(f'{fits_folder}\\test.csv', index_col=0, dtype={'obs_id':str})
already_skipped_list = read_csv(f'{fits_folder}\\test_skipped.csv', index_col=0, dtype={'obs_id':str})
already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values
already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values
obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list]
not_processed = np.array([(curr not in already_processed) for curr in obs_list_names])
not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names])
obs_list = obs_list[np.logical_and(not_processed, not_skipped)]
print(f'Removed already processed observations. {len(obs_list)} observations remain.')
# START PROCESSING
print('Started processing...')
num = 0
for group_idx in range(len(obs_list)//group_size+1):
print(f'Started group {group_idx}')
group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1), len(obs_list))]
max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max()
process_num = cpu_count() if max_size < 50 else (cpu_count()//2 if max_size < 200 else (cpu_count()//4 if max_size < 1000 else 1))
print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
with get_context('spawn').Pool(processes=process_num) as pool:
packed_args = map(lambda _: (_, thresh), group_list)
for result, region, region_raw in pool.imap(process, packed_args):
if type(result) is np.str_:
obs_id = result[result.index('nu'):result.index('_cl.evt')]
print(f'{num:>3} is skipped. File {obs_id}')
num += 1
continue
for key, value in zip(table.keys(), result):
table[key] = [value]
if table['exposure'][0] < 1000:
print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000')
DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv', mode='a', header=False)
num +=1
continue
DataFrame(table).to_csv(f'{fits_folder}\\test.csv', mode='a', header=False)
fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite=True)
if region_raw != -1:
region_raw.writeto(f'{region_raw_folder}\\{str(result[0])+result[1]}_reg_raw.fits', overwrite=True)
print(f'{num:>3} {str(result[0])+result[1]} is written.')
num +=1
print('Converting generated csv to fits file...')
print(f'Current time in: {(perf_counter()-start):.2f}')
print(f'Processed {num/len(obs_list)*100:.2f} percent')
csv_file = read_csv(f'{fits_folder}\\test.csv', index_col=0, dtype={'obs_id': str})
Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits', overwrite=True)
print(f'Finished writing: {perf_counter()-start}')

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

5
requirements.txt Normal file
View File

@ -0,0 +1,5 @@
astropy==5.1
numpy==1.23.2
pandas==1.4.4
scipy==1.9.1
setuptools==57.4.0

29
setup.py Normal file
View File

@ -0,0 +1,29 @@
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
setuptools.setup(
name="nuwavsource",
version="0.0.4",
author="Andrey Mukhin",
author_email="amukhin@phystech.edu",
description="A package for source exclusion in NuStar observation data using wavelet decomposition",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/Andreyousan/nuwavsource",
packages=setuptools.find_packages(),
include_package_data=True,
classifiers=(
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
),
install_requires = [
'astropy==5.1',
'numpy==1.23.2',
'pandas==1.4.4',
'scipy==1.9.1',
'setuptools==57.4.0',
]
)