diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..33dd012 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Andrey Mukhin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/__pycache__/get_region_pack.cpython-39.pyc b/__pycache__/get_region_pack.cpython-39.pyc new file mode 100644 index 0000000..0f9ec9d Binary files /dev/null and b/__pycache__/get_region_pack.cpython-39.pyc differ diff --git a/get_region_archive_table.py b/get_region_archive_table.py index 5b9a492..abf7f37 100644 --- a/get_region_archive_table.py +++ b/get_region_archive_table.py @@ -1,12 +1,11 @@ # %% from get_region_pack import * import numpy as np -import pandas as pd +from pandas import DataFrame, read_csv from astropy.table import Table from astropy.coordinates import SkyCoord from astropy import units as u -import multiprocessing -from multiprocessing import get_context +from multiprocessing import get_context, cpu_count import warnings import time import os @@ -98,16 +97,27 @@ def process(argument): return obs_name, np.zeros((360,360)) #%% if __name__ == '__main__': - start = time.perf_counter() - processing = False - start_new = False - group_size = 50 - fits_folder = 'D:\Programms\Jupyter\Science\Source_mask\\\Archive\Processing_v8' + #DIALOGUE + print('Enter path to the input folder') + input_folder = input() + obs_list = get_link_list(input_folder,sort_list = True)[:] + print('Create new file for this processing? y/n') + continue_old = input() + if continue_old == 'y': + start_new = True + elif continue_old == 'n': + start_new = False + else: + print('Cannot interprete input, closing script') + raise SystemExit(0) + print(f'Enter path to the output folder') + fits_folder = input() region_folder = f'{fits_folder}\\Region' - if not os.path.exists(fits_folder): - os.makedirs(fits_folder) - os.makedirs(region_folder) - obs_list = get_link_list('E:\\Archive\\0[0-9]\\[0-9]',sort_list = True)[:] + #INIT ALL NECESSARY FILES AND VARIBALES + start = time.perf_counter() + processing = True + group_size = 50 + os.makedirs(region_folder,exist_ok = True) #FILTERING BY THE FILE SIZE print(f'Finished scanning folders. Found {len(obs_list)} observations.') table = { @@ -115,14 +125,12 @@ if __name__ == '__main__': 'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[] } if start_new: - out_table = pd.DataFrame(table) + out_table = DataFrame(table) out_table.to_csv(f'{fits_folder}\\test.csv') - # out_table.to_csv(f'{fits_folder}\\test_skipped.csv') - os.system(f'copy D:\Programms\Jupyter\Science\Source_mask\Archive\Processing_v3\\test_skipped.csv {fits_folder}') - #REMOVING PROCESSED OBSERVATIONS - # already_processed = fits.getdata(f'{fits_folder}\\test.fits')['obs_name'] - already_processed_list = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) - already_skipped_list = pd.read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str}) + out_table.to_csv(f'{fits_folder}\\test_skipped.csv') + #FILTERING OUT PROCESSED OBSERVATIONS + already_processed_list = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) + already_skipped_list = read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str}) already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list] @@ -130,6 +138,7 @@ if __name__ == '__main__': not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names]) obs_list = obs_list[np.logical_and(not_processed,not_skipped)] print(f'Removed already processed observations. {len(obs_list)} observations remain.') + #START PROCESSING if processing: print('Started processing...') num = 0 @@ -137,7 +146,7 @@ if __name__ == '__main__': print(f'Started group {group_idx}') group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))] max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max() - process_num = 10 if max_size<50 else (5 if max_size<200 else (2 if max_size<1000 else 1)) + process_num = cpu_count() if max_size<50 else (cpu_count()//2 if max_size<200 else (cpu_count()//4 if max_size<1000 else 1)) print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes") with get_context('spawn').Pool(processes=process_num) as pool: for result,region in pool.imap(process,enumerate(group_list)): @@ -150,17 +159,17 @@ if __name__ == '__main__': table[key] = [value] if table['exposure'][0] < 1000: print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000') - pd.DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False) + DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False) num +=1 continue - pd.DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False) + DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False) fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True) print(f'{num:>3} {str(result[0])+result[1]} is written.') num +=1 print('Converting generated csv to fits file...') - print(f'Current time in: {time.perf_counter()-start}') + print(f'Current time in: {(time.perf_counter()-start):.2f}') print(f'Processed {num/len(obs_list)*100:.2f} percent') - csv_file = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) + csv_file = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True) print(f'Finished writing: {time.perf_counter()-start}') # %% diff --git a/get_region_pack.py b/get_region_pack.py index 1df9b70..781a3e5 100644 --- a/get_region_pack.py +++ b/get_region_pack.py @@ -34,10 +34,13 @@ def get_wcs(file): 'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39'] }) return wcs -def get_link_list(folder, sort_list=False): +def get_link_list(folder, sort_list=True): links = glob(f'{folder}\\**\\*_cl.evt',recursive=True) - sorted_list = sorted(links, key=lambda x: stat(x).st_size) - return np.array(sorted_list) + if sort_list: + sorted_list = sorted(links, key=lambda x: stat(x).st_size) + return np.array(sorted_list) + else: + return np.array(links) def atrous(level = 0, resize = False, max_size = 1001): base = 1/256*np.array([ [1, 4, 6, 4, 1], @@ -75,21 +78,15 @@ def adjecent(array): output = np.argwhere(output == True) return output[:,0], output[:,1] def add_borders(array,middle=True): - # array_blurred = convolve2d(array,np.ones((5,5)),mode='same') mask = np.zeros(array.shape) datax, datay = np.any(array>0,0), np.any(array>0,1) - # datax, datay = np.any(array_blurred>0,0), np.any(array_blurred>0,1) #Add border masks x_min, y_min = np.argmax(datax), np.argmax(datay) x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1]) - # x_mid_min, y_mid_min = x_min+10+np.argmin(datax[x_min+10:]), y_min+10+np.argmin(datay[y_min+10:]) - # x_mid_max, y_mid_max = x_max-10-np.argmin(datax[x_max-11::-1]), y_max-10-np.argmin(datay[y_max-11::-1]) mask[y_min:y_max,x_min:x_max] = True if middle is True: mask[176:191,:] = False mask[:,176:191] = False - # mask[y_mid_min:y_mid_max,:] = False - # mask[:,x_mid_min:x_mid_max] = False mask = np.logical_not(mask) return mask def fill_poisson(array, size_input=32): @@ -101,7 +98,6 @@ def fill_poisson(array, size_input=32): mask = array.mask.copy() while mask.sum()>1: kernel = np.ones((size,size))/size**2 - # coeff = fftconvolve(np.logical_not(mask), kernel, mode='same') coeff = fftconvolve(np.logical_not(mask),kernel,mode='same') mean = fftconvolve(output,kernel,mode='same') idx = np.where(np.logical_and(mask,coeff>0.1))