Merge pull request #4 from Andreyousan/code_trimming

Code trimming
This commit is contained in:
Андрей Мухин 2022-09-05 13:43:18 +03:00 committed by GitHub
commit 5b4f898901
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 34 deletions

21
LICENSE.md Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Andrey Mukhin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Binary file not shown.

View File

@ -1,12 +1,11 @@
# %%
from get_region_pack import *
import numpy as np
import pandas as pd
from pandas import DataFrame, read_csv
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
import multiprocessing
from multiprocessing import get_context
from multiprocessing import get_context, cpu_count
import warnings
import time
import os
@ -98,16 +97,27 @@ def process(argument):
return obs_name, np.zeros((360,360))
#%%
if __name__ == '__main__':
start = time.perf_counter()
processing = False
start_new = False
group_size = 50
fits_folder = 'D:\Programms\Jupyter\Science\Source_mask\\\Archive\Processing_v8'
#DIALOGUE
print('Enter path to the input folder')
input_folder = input()
obs_list = get_link_list(input_folder,sort_list = True)[:]
print('Create new file for this processing? y/n')
continue_old = input()
if continue_old == 'y':
start_new = True
elif continue_old == 'n':
start_new = False
else:
print('Cannot interprete input, closing script')
raise SystemExit(0)
print(f'Enter path to the output folder')
fits_folder = input()
region_folder = f'{fits_folder}\\Region'
if not os.path.exists(fits_folder):
os.makedirs(fits_folder)
os.makedirs(region_folder)
obs_list = get_link_list('E:\\Archive\\0[0-9]\\[0-9]',sort_list = True)[:]
#INIT ALL NECESSARY FILES AND VARIBALES
start = time.perf_counter()
processing = True
group_size = 50
os.makedirs(region_folder,exist_ok = True)
#FILTERING BY THE FILE SIZE
print(f'Finished scanning folders. Found {len(obs_list)} observations.')
table = {
@ -115,14 +125,12 @@ if __name__ == '__main__':
'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[]
}
if start_new:
out_table = pd.DataFrame(table)
out_table = DataFrame(table)
out_table.to_csv(f'{fits_folder}\\test.csv')
# out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
os.system(f'copy D:\Programms\Jupyter\Science\Source_mask\Archive\Processing_v3\\test_skipped.csv {fits_folder}')
#REMOVING PROCESSED OBSERVATIONS
# already_processed = fits.getdata(f'{fits_folder}\\test.fits')['obs_name']
already_processed_list = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
already_skipped_list = pd.read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
out_table.to_csv(f'{fits_folder}\\test_skipped.csv')
#FILTERING OUT PROCESSED OBSERVATIONS
already_processed_list = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
already_skipped_list = read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str})
already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values
already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values
obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list]
@ -130,6 +138,7 @@ if __name__ == '__main__':
not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names])
obs_list = obs_list[np.logical_and(not_processed,not_skipped)]
print(f'Removed already processed observations. {len(obs_list)} observations remain.')
#START PROCESSING
if processing:
print('Started processing...')
num = 0
@ -137,7 +146,7 @@ if __name__ == '__main__':
print(f'Started group {group_idx}')
group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))]
max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max()
process_num = 10 if max_size<50 else (5 if max_size<200 else (2 if max_size<1000 else 1))
process_num = cpu_count() if max_size<50 else (cpu_count()//2 if max_size<200 else (cpu_count()//4 if max_size<1000 else 1))
print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
with get_context('spawn').Pool(processes=process_num) as pool:
for result,region in pool.imap(process,enumerate(group_list)):
@ -150,17 +159,17 @@ if __name__ == '__main__':
table[key] = [value]
if table['exposure'][0] < 1000:
print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000')
pd.DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False)
num +=1
continue
pd.DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False)
fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True)
print(f'{num:>3} {str(result[0])+result[1]} is written.')
num +=1
print('Converting generated csv to fits file...')
print(f'Current time in: {time.perf_counter()-start}')
print(f'Current time in: {(time.perf_counter()-start):.2f}')
print(f'Processed {num/len(obs_list)*100:.2f} percent')
csv_file = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
csv_file = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str})
Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True)
print(f'Finished writing: {time.perf_counter()-start}')
# %%

View File

@ -34,10 +34,13 @@ def get_wcs(file):
'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39']
})
return wcs
def get_link_list(folder, sort_list=False):
def get_link_list(folder, sort_list=True):
links = glob(f'{folder}\\**\\*_cl.evt',recursive=True)
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
if sort_list:
sorted_list = sorted(links, key=lambda x: stat(x).st_size)
return np.array(sorted_list)
else:
return np.array(links)
def atrous(level = 0, resize = False, max_size = 1001):
base = 1/256*np.array([
[1, 4, 6, 4, 1],
@ -75,21 +78,15 @@ def adjecent(array):
output = np.argwhere(output == True)
return output[:,0], output[:,1]
def add_borders(array,middle=True):
# array_blurred = convolve2d(array,np.ones((5,5)),mode='same')
mask = np.zeros(array.shape)
datax, datay = np.any(array>0,0), np.any(array>0,1)
# datax, datay = np.any(array_blurred>0,0), np.any(array_blurred>0,1)
#Add border masks
x_min, y_min = np.argmax(datax), np.argmax(datay)
x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1])
# x_mid_min, y_mid_min = x_min+10+np.argmin(datax[x_min+10:]), y_min+10+np.argmin(datay[y_min+10:])
# x_mid_max, y_mid_max = x_max-10-np.argmin(datax[x_max-11::-1]), y_max-10-np.argmin(datay[y_max-11::-1])
mask[y_min:y_max,x_min:x_max] = True
if middle is True:
mask[176:191,:] = False
mask[:,176:191] = False
# mask[y_mid_min:y_mid_max,:] = False
# mask[:,x_mid_min:x_mid_max] = False
mask = np.logical_not(mask)
return mask
def fill_poisson(array, size_input=32):
@ -101,7 +98,6 @@ def fill_poisson(array, size_input=32):
mask = array.mask.copy()
while mask.sum()>1:
kernel = np.ones((size,size))/size**2
# coeff = fftconvolve(np.logical_not(mask), kernel, mode='same')
coeff = fftconvolve(np.logical_not(mask),kernel,mode='same')
mean = fftconvolve(output,kernel,mode='same')
idx = np.where(np.logical_and(mask,coeff>0.1))