This commit is contained in:
Andrey Mukhin 2022-09-05 13:17:27 +03:00
parent 358a1ed7da
commit dc92cf59c3
2 changed files with 39 additions and 34 deletions

View File

@ -1,12 +1,11 @@
# %%
from get_region_pack import *
import numpy as np
import pandas as pd
from pandas import DataFrame, read_csv
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
import multiprocessing
from multiprocessing import get_context
from multiprocessing import get_context, cpu_count
import warnings
import time
import os
@ -98,16 +97,27 @@ def process(argument):
return obs_name, np.zeros((360,360))
#%%
if __name__ == '__main__':
start = time.perf_counter()
processing = False
start_new = False
group_size = 50
fits_folder = 'D:\Programms\Jupyter\Science\Source_mask\\\Archive\Processing_v8'
#DIALOGUE
print('Enter path to the input folder')
input_folder = input()
obs_list = get_link_list(input_folder,sort_list = True)[:]
print('Create new file for this processing? y/n')
continue_old = input()
if continue_old == 'y':
start_new = True
elif continue_old == 'n':
start_new = False
else:
print('Cannot interprete input, closing script')
raise SystemExit(0)
print(f'Enter path to the output folder')
fits_folder = input()
region_folder = f'{fits_folder}\\Region'
if not os.path.exists(fits_folder):
os.makedirs(fits_folder)
os.makedirs(region_folder)
obs_list = get_link_list('E:\\Archive\\0[0-9]\\[0-9]',sort_list = True)[:]
#INIT ALL NECESSARY FILES AND VARIBALES
start = time.perf_counter()
processing = True
group_size = 50
os.makedirs(region_folder,exist_ok = True)
#FILTERING BY THE FILE SIZE
print(f'Finished scanning folders. Found {len(obs_list)} observations.')
table = {
@ -115,14 +125,12 @@ if __name__ == '__main__':
'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[]
}
if start_new:
out_table = pd.DataFrame(table)
out_table = DataFrame(table)
out_table.to_csv(f'{fits_folder}\\test.csv')
# out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
os.system(f'copy D:\Programms\Jupyter\Science\Source_mask\Archive\Processing_v3\\test_skipped.csv {fits_folder}')
#REMOVING PROCESSED OBSERVATIONS
# already_processed = fits.getdata(f'{fits_folder}\\test.fits')['obs_name']
already_processed_list = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
already_skipped_list = pd.read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
#FILTERING OUT PROCESSED OBSERVATIONS
already_processed_list = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
already_skipped_list = read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values
already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values
obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list]
@ -130,6 +138,7 @@ if __name__ == '__main__':
not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names])
obs_list = obs_list[np.logical_and(not_processed,not_skipped)]
print(f'Removed already processed observations. {len(obs_list)} observations remain.')
#START PROCESSING
if processing:
print('Started processing...')
num = 0
@ -137,7 +146,7 @@ if __name__ == '__main__':
print(f'Started group {group_idx}')
group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))]
max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max()
process_num = 10 if max_size<50 else (5 if max_size<200 else (2 if max_size<1000 else 1))
process_num = cpu_count() if max_size<50 else (cpu_count()//2 if max_size<200 else (cpu_count()//4 if max_size<1000 else 1))
print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
with get_context('spawn').Pool(processes=process_num) as pool:
for result,region in pool.imap(process,enumerate(group_list)):
@ -150,17 +159,17 @@ if __name__ == '__main__':
table[key] = [value]
if table['exposure'][0] < 1000:
print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000')
pd.DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
num +=1
continue
pd.DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True)
print(f'{num:>3} {str(result[0])+result[1]} is written.')
num +=1
print('Converting generated csv to fits file...')
print(f'Current time in: {time.perf_counter()-start}')
print(f'Current time in: {(time.perf_counter()-start):.2f}')
print(f'Processed {num/len(obs_list)*100:.2f} percent')
csv_file = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
csv_file = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True)
print(f'Finished writing: {time.perf_counter()-start}')
# %%

View File

@ -34,10 +34,13 @@ def get_wcs(file):
'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39']
})
return wcs
def get_link_list(folder, sort_list=False):
def get_link_list(folder, sort_list=True):
links = glob(f'{folder}\\**\\*_cl.evt',recursive=True)
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
if sort_list:
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
else:
return np.array(links)
def atrous(level = 0, resize = False, max_size = 1001):
base = 1/256*np.array([
[1, 4, 6, 4, 1],
@ -75,21 +78,15 @@ def adjecent(array):
output = np.argwhere(output == True)
return output[:,0], output[:,1]
def add_borders(array,middle=True):
# array_blurred = convolve2d(array,np.ones((5,5)),mode='same')
mask = np.zeros(array.shape)
datax, datay = np.any(array>0,0), np.any(array>0,1)
# datax, datay = np.any(array_blurred>0,0), np.any(array_blurred>0,1)
#Add border masks
x_min, y_min = np.argmax(datax), np.argmax(datay)
x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1])
# x_mid_min, y_mid_min = x_min+10+np.argmin(datax[x_min+10:]), y_min+10+np.argmin(datay[y_min+10:])
# x_mid_max, y_mid_max = x_max-10-np.argmin(datax[x_max-11::-1]), y_max-10-np.argmin(datay[y_max-11::-1])
mask[y_min:y_max,x_min:x_max] = True
if middle is True:
mask[176:191,:] = False
mask[:,176:191] = False
# mask[y_mid_min:y_mid_max,:] = False
# mask[:,x_mid_min:x_mid_max] = False
mask = np.logical_not(mask)
return mask
def fill_poisson(array, size_input=32):
@ -101,7 +98,6 @@ def fill_poisson(array, size_input=32):
mask = array.mask.copy()
while mask.sum()>1:
kernel = np.ones((size,size))/size**2
# coeff = fftconvolve(np.logical_not(mask), kernel, mode='same')
coeff = fftconvolve(np.logical_not(mask),kernel,mode='same')
mean = fftconvolve(output,kernel,mode='same')
idx = np.where(np.logical_and(mask,coeff>0.1))