From dc92cf59c3663e5b6c4c9ac009cecb5381fc4170 Mon Sep 17 00:00:00 2001 From: Andrey Mukhin Date: Mon, 5 Sep 2022 13:17:27 +0300 Subject: [PATCH 1/3] commit --- get_region_archive_table.py | 57 +++++++++++++++++++++---------------- get_region_pack.py | 16 ++++------- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/get_region_archive_table.py b/get_region_archive_table.py index 5b9a492..abf7f37 100644 --- a/get_region_archive_table.py +++ b/get_region_archive_table.py @@ -1,12 +1,11 @@ # %% from get_region_pack import * import numpy as np -import pandas as pd +from pandas import DataFrame, read_csv from astropy.table import Table from astropy.coordinates import SkyCoord from astropy import units as u -import multiprocessing -from multiprocessing import get_context +from multiprocessing import get_context, cpu_count import warnings import time import os @@ -98,16 +97,27 @@ def process(argument): return obs_name, np.zeros((360,360)) #%% if __name__ == '__main__': - start = time.perf_counter() - processing = False - start_new = False - group_size = 50 - fits_folder = 'D:\Programms\Jupyter\Science\Source_mask\\\Archive\Processing_v8' + #DIALOGUE + print('Enter path to the input folder') + input_folder = input() + obs_list = get_link_list(input_folder,sort_list = True)[:] + print('Create new file for this processing? y/n') + continue_old = input() + if continue_old == 'y': + start_new = True + elif continue_old == 'n': + start_new = False + else: + print('Cannot interprete input, closing script') + raise SystemExit(0) + print(f'Enter path to the output folder') + fits_folder = input() region_folder = f'{fits_folder}\\Region' - if not os.path.exists(fits_folder): - os.makedirs(fits_folder) - os.makedirs(region_folder) - obs_list = get_link_list('E:\\Archive\\0[0-9]\\[0-9]',sort_list = True)[:] + #INIT ALL NECESSARY FILES AND VARIBALES + start = time.perf_counter() + processing = True + group_size = 50 + os.makedirs(region_folder,exist_ok = True) #FILTERING BY THE FILE SIZE print(f'Finished scanning folders. Found {len(obs_list)} observations.') table = { @@ -115,14 +125,12 @@ if __name__ == '__main__': 'count_rate':[], 'remaining_area':[], 'poisson_chi2':[], 'poisson_chi2_full':[], 'rms':[] } if start_new: - out_table = pd.DataFrame(table) + out_table = DataFrame(table) out_table.to_csv(f'{fits_folder}\\test.csv') - # out_table.to_csv(f'{fits_folder}\\test_skipped.csv') - os.system(f'copy D:\Programms\Jupyter\Science\Source_mask\Archive\Processing_v3\\test_skipped.csv {fits_folder}') - #REMOVING PROCESSED OBSERVATIONS - # already_processed = fits.getdata(f'{fits_folder}\\test.fits')['obs_name'] - already_processed_list = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) - already_skipped_list = pd.read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str}) + out_table.to_csv(f'{fits_folder}\\test_skipped.csv') + #FILTERING OUT PROCESSED OBSERVATIONS + already_processed_list = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) + already_skipped_list = read_csv(f'{fits_folder}\\test_skipped.csv',index_col=0,dtype={'obs_id':str}) already_processed = (already_processed_list['obs_id'].astype(str)+already_processed_list['detector']).values already_skipped = (already_skipped_list['obs_id'].astype(str)+already_skipped_list['detector']).values obs_list_names = [curr[curr.index('nu')+2:curr.index('_cl.evt')-2] for curr in obs_list] @@ -130,6 +138,7 @@ if __name__ == '__main__': not_skipped = np.array([(curr not in already_skipped) for curr in obs_list_names]) obs_list = obs_list[np.logical_and(not_processed,not_skipped)] print(f'Removed already processed observations. {len(obs_list)} observations remain.') + #START PROCESSING if processing: print('Started processing...') num = 0 @@ -137,7 +146,7 @@ if __name__ == '__main__': print(f'Started group {group_idx}') group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1),len(obs_list))] max_size = np.array([stat(file).st_size/2**20 for file in group_list]).max() - process_num = 10 if max_size<50 else (5 if max_size<200 else (2 if max_size<1000 else 1)) + process_num = cpu_count() if max_size<50 else (cpu_count()//2 if max_size<200 else (cpu_count()//4 if max_size<1000 else 1)) print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes") with get_context('spawn').Pool(processes=process_num) as pool: for result,region in pool.imap(process,enumerate(group_list)): @@ -150,17 +159,17 @@ if __name__ == '__main__': table[key] = [value] if table['exposure'][0] < 1000: print(f'{num:>3} {str(result[0])+result[1]} is skipped. Exposure < 1000') - pd.DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False) + DataFrame(table).to_csv(f'{fits_folder}\\test_skipped.csv',mode='a',header=False) num +=1 continue - pd.DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False) + DataFrame(table).to_csv(f'{fits_folder}\\test.csv',mode='a',header=False) fits.writeto(f'{region_folder}\\{str(result[0])+result[1]}_region.fits', region, overwrite= True) print(f'{num:>3} {str(result[0])+result[1]} is written.') num +=1 print('Converting generated csv to fits file...') - print(f'Current time in: {time.perf_counter()-start}') + print(f'Current time in: {(time.perf_counter()-start):.2f}') print(f'Processed {num/len(obs_list)*100:.2f} percent') - csv_file = pd.read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) + csv_file = read_csv(f'{fits_folder}\\test.csv',index_col=0,dtype={'obs_id':str}) Table.from_pandas(csv_file).write(f'{fits_folder}\\test.fits',overwrite=True) print(f'Finished writing: {time.perf_counter()-start}') # %% diff --git a/get_region_pack.py b/get_region_pack.py index 1df9b70..781a3e5 100644 --- a/get_region_pack.py +++ b/get_region_pack.py @@ -34,10 +34,13 @@ def get_wcs(file): 'NAXIS1': header['TLMAX38'], 'NAXIS2': header['TLMAX39'] }) return wcs -def get_link_list(folder, sort_list=False): +def get_link_list(folder, sort_list=True): links = glob(f'{folder}\\**\\*_cl.evt',recursive=True) - sorted_list = sorted(links, key=lambda x: stat(x).st_size) - return np.array(sorted_list) + if sort_list: + sorted_list = sorted(links, key=lambda x: stat(x).st_size) + return np.array(sorted_list) + else: + return np.array(links) def atrous(level = 0, resize = False, max_size = 1001): base = 1/256*np.array([ [1, 4, 6, 4, 1], @@ -75,21 +78,15 @@ def adjecent(array): output = np.argwhere(output == True) return output[:,0], output[:,1] def add_borders(array,middle=True): - # array_blurred = convolve2d(array,np.ones((5,5)),mode='same') mask = np.zeros(array.shape) datax, datay = np.any(array>0,0), np.any(array>0,1) - # datax, datay = np.any(array_blurred>0,0), np.any(array_blurred>0,1) #Add border masks x_min, y_min = np.argmax(datax), np.argmax(datay) x_max, y_max = len(datax) - np.argmax(datax[::-1]), len(datay) - np.argmax(datay[::-1]) - # x_mid_min, y_mid_min = x_min+10+np.argmin(datax[x_min+10:]), y_min+10+np.argmin(datay[y_min+10:]) - # x_mid_max, y_mid_max = x_max-10-np.argmin(datax[x_max-11::-1]), y_max-10-np.argmin(datay[y_max-11::-1]) mask[y_min:y_max,x_min:x_max] = True if middle is True: mask[176:191,:] = False mask[:,176:191] = False - # mask[y_mid_min:y_mid_max,:] = False - # mask[:,x_mid_min:x_mid_max] = False mask = np.logical_not(mask) return mask def fill_poisson(array, size_input=32): @@ -101,7 +98,6 @@ def fill_poisson(array, size_input=32): mask = array.mask.copy() while mask.sum()>1: kernel = np.ones((size,size))/size**2 - # coeff = fftconvolve(np.logical_not(mask), kernel, mode='same') coeff = fftconvolve(np.logical_not(mask),kernel,mode='same') mean = fftconvolve(output,kernel,mode='same') idx = np.where(np.logical_and(mask,coeff>0.1)) From 5093159bfe9aadc607625b2e0e54f6ab1608b21b Mon Sep 17 00:00:00 2001 From: Andrey Mukhin Date: Mon, 5 Sep 2022 13:28:56 +0300 Subject: [PATCH 2/3] commit --- __pycache__/get_region_pack.cpython-39.pyc | Bin 0 -> 9175 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 __pycache__/get_region_pack.cpython-39.pyc diff --git a/__pycache__/get_region_pack.cpython-39.pyc b/__pycache__/get_region_pack.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0f9ec9da82761644751dd44460fa915488992520 GIT binary patch literal 9175 zcmc&)U2GiJb)Gx_JG)CR$rb-(JBlOAUNu%Exs87!n35&ZQEJhuNZZns?smC%cD3aG z>CTWQnPKacR+FNzYZNH@<2v95Z4l?7D2n!>eQ6Q&p%48r^40;0w#Y+56ll`+A*hSO z{mz*sm!#da=u4NHGv}Ur=g!ReKW9|Q=RFO-^`9?Pzj;>EzC(keRPs>yClfcI1!}3#DHSxSAPs>N%(BvaKx<4t!f>wHb4{xK*mqcQQQ6(zrNw!vt zs-5l8RUF|>04bzi*XttIirTh_ zv|S;MSR2z~t)@qMPv6!fBQjU4SiGg}h-=!b(@Xlcv28}$iXEGgF3mUfSdWcM+P(9! zj_0-J72R-;fe!3FkytX?>;@$#(c9g`+6q)BOw8p@rqr|ExRoN_AiGgLlyIxU^N;?V@qq7zzMni5SRx?-+n(t|90=N3$vV3Ogrs~vH zrPT_TzOdHa#GXqF)ke^+21~D1*2B)4ss`ou8hELe>OoXiLA}vwm%Ekf>bdS_l3Q-H zD{8aMHy*;CAuF925=AkNLrsB}7z?YLReHJoAL*WpgZbJ8LY$1jUE*L29)%6tC^`0s1DQ!W+dI#&9{q-)d&tScWl^(AL9IThPrj(sm^Z;ra zAQ5*GaeME3cq$f#Z(Iw&%xklY7w)8|+k1B*ppvOb84?GKkQHzwN2BcBMi?;ZilFq_mJ_ac42L=c+7V48juC&EV?PUo zxHYA%kQNH!7~UGX$Pl~&ZZQOhh(8R8GeUwhfF8pv!yLmrI5za#xg7xskY*029P*vT ztBZGTKJnB(@-!hY&0kL`gfQjW3yUd*5T@L^c{`;Ll2TrNp-%xqloxJXN!Kv)bSdZC zbhFtD{qsefafd}3)0xKCW?$ejooRgQ=Jne&PiG##eB}im4!`Nmn$PIAK%{8(RVP5gW(f(Y@5Hw3cP%S>nRK z;n21|L9g9xv{wPb=-j1dr&?)-&jLwTVjp8Q*n~wmw7ng6R1`=y2sH{y$@J}7rwKkM z8Jgp5e2aDr{cO6LX~&->f8@rGV5^V>rHN5d5b6nFdwZ~jD%r#Sj}~NbFt3Ne5Xqjo zju`ee0cbEZ58*_&dICP&kQV!IV+DQ%7T`?6vrA_}BcBcbW5#Ap?}=?IHew5r5MGGZ z-!~59HT@E7O1%XyZ(ICm} zU;ntdAALm)9s$15f*NGz>nW?cGbM}N-X5vu9`Q~61Oak|m?C_yK!CiIdXfOqF!fo& zp90uBy^p0dVmy1zbQ9rcx)oV|ts$ekxQsj`D@0<2cPrgM5L6INp)O;E zdJy1<+R#gr_OjZ%4kUu&NFBm6NO({KoMU|lrb*N>sDuO-i50y_!qRz-Ii{V|NXU`c zHE@_QOgJ{A0RijN8o@Cw3%Gy9NsSfH#y-}r9peu{&xsuf%9`J!0Hc_v^myOJf!{yO zAQ>=PjdmJw^lrm9f{m_kS5&244}7<^){Gk6=B96fUx6yQ%v<$2T&pe-cox97!bZJS zNyHWPIMK+P9cQdnudIdP)0hcKbefn(G@2F{!A~`Z@e$WUHh5rn-@t&PIP6WZrKPq< zI2eK`1r{Ri7|4KNbF0?2zIBEyjIB%uG-EKP*T+_1os5y-K!<_(j%vAGSqSa4>RdRMXiaN=6$0iOZ?x_6g)qnE(w$6n{@BR`};Y zUOc4d?m)E_@gd1JJM{*fWVzCogF(9!ArGj8tI5!nC{m4Oh-X%*j`H1#s=szOPywEV zJf?5fRYR&7x;-UlDd23ut;m(28nmO!poc`#pxdnR|7lyJ2uEiSjwVS)H!!z&BoL8+ z=p|Xp-v%uR|lGvO1h2J3-cH%)u4*1gO@w9l;&$kdwt6J3`|= zQjeaP)G%ZAXUwBBxYF5nrHS;{j$ODkD|YVdsq2*Hnx?)5$4SW@sHym9j%ledd4_1X zuO0UV4Pm-z3e7^>`w+jyxxL@Vi|+j?K*>o3MSe#;1CSV%_9j^lOkpE2kgKUL(7H_O zrtDTjN+bk737T(Fmba0fU|Snyn4oWMrYDSO4x`FPB5JlGWqG-y5dT6FNt!w6Iq|SD zE}k)pFsh;`8lF&#nB6-C@DZUMxTZhHECqFPJS)bI9!^IP*NzaNQT7Xc*T|LhATVGK zV-VJ^+k@L#HPxTNEoH!VFdME3*+=TKA`kQ|2^BF5dB~dv1nY5}OGggob2##df;Mc+ z>=DylJjc-PFzAKYi7BUqIH%wQVX<(0u?wL&xB_Ro*VEdze(z#zu%JH3&v#CdIh?9> z*4i>PLH2gf<0aMS0qVc?oAmqRnYq$P;x)oX8?^_@k-qDL|H|IL27$anwN7GnRih0X zBS|G@wbR{X(IrN>)=JDyI|$i4S;mRQ2B)s$HogtlDLXCS>2?}n*l9yAmcxB5@F@6{ z8*K_b~>r{3@X$iz&grrj;aF%>H`~|gNGuaA5n>ij;MsTYe^E75h2RJF={Jr4+Sa8 z(I(1K6VXbGG8BacfHVHM5iLuq_DiUg6+sFryhh-O|BrxA<6S`&4T@c=;M~8dp3Z?h4h!ijPF(1HbVt%tEkm?^9eQ?ow)# z_9&35Kp3bS??Y_t6BvhUO=pZ)ApJ239;`S*B~v5zF<_TL7*b4f>MSeYj8MvZphMpPshH9MSpi6};N80F8!B|LFVNUD2^P)dyXX87nj38tydy+Sol5nC`CJxvr>9^;0c~$PY`Qm_=8xW z6$J-OX%8%si_9JbUPe1=7wxE?_?53BJRto-(*SK0xNWWM9E_<=pq1T&F*OXda^_%6 zZeA-RIHcDDx1m}#H50g%pp~;xj&cIbHefjd%lh=M! zPi-M>s8I-9x`Fmk^D{DtE|hEC)>h_Rbz}J)btL!x5PLPg{wVaC75x;3zB_yS=8Fq2 z-I^^;t51PIjSFO&jtaWXN)`1I^_nW`Dv2S3YT`wWR#2vP z9y)@!H@7ZS32O)bF3HjJ1u|_BQslX1MBqkLF8?Cdgl7P# z%zFUFS?K=}<{@~V2dyXyAL?TKLtEb}9KtB{J$Vz-i6|GU%nmwxT)_h;(9{M_Ha z_~t*&)c@v(3*Y(Ocm8#z{#SoVzaPwC)i?j;+c%aAKbWaw?JqaKi{&-`f%4_?zy0eU zE_{C`RRM)v2Vn|c?~6n@sIGme0oT`5PIu8-gx7nWR#XA}d|G15GN&xZ{a=Q}%QL@@ z#r>nd?8WdY@F*CgZ-TI-)HJyEdJi}|G4!#Gn|?FHcWo; zr3GfZuiw0WM2Fyf`PI4cMrzEj&e4|QEG0?M6$AZehTMO~Yo`&7nilF41Wpm4y%6A7 zo(~f)(uCn?N~z9a>T9(AGXPs>j*EwE^kZ6e-at@`iY+|J zlE9bI%IP*XO8Hcq=q=SR0QmOJ>s$r#8MzPb>$~{Ez&^qE2uoWxewH3J& zUTZXEnY!nL^!k``a#Vf?d-wC8apcOQ;xzIf%7^H$fbmg--}hEW3MvnK(7^`(&p=36 z6fY@{rfA98GP*YW(dJJ$;y_C&Y9QiR?;@*}_5?H$+KWI=Q3m?#P>Ds_yCR}YY@zP; zxaH?WSrkmvXvYVLDdf?}ywAfD-0ckRmWv(0JoKAZxLkMBh+6WQ9(qfp?f1HUeMegZ ztA}DoQnTKXXx3BOAoEZxhOAgaQyOK~_a2Cb>GKkOVndr{Xw~dD^GB!?d*D`9YTay*V_!n(oPcHky?Fk@dHlPue*XNE9KO=3u0}T?miwlvytct`)6d+R zoh$Q)l>vHtU<2xTy6W=)r9!H;T(|!W2#LkLE{Dl&wWb+5A`Wrge{yJ1kphfZkRq;}sM8}`XxONdm zAkFER!}r9rxQmP(B!qB|wuB;a`sZ*f6Uw~ekn1J21_qa0qu!j`A7cgz$|JZ?Bzx*B zaHnvYJtSA#9?C?I>E3P@YEKEDL+0|fc=dfa*gV$SF}@#BWJE84#`#HYdkCgut{QLa z>Z$&`7=uEiH{qbAI*jXJEQhWu!0ZY!;HrjB0;R9R7`ZS$fp+XLo@6A(p7BUgLy*Xz zipfxwGQcS1+a9GL!*?CUYU_VkJr)(>QM_9bcN>Yv;v#dY$Q;6MppM3)$Eh4fPQ)j6 z$57ttYJdI&_{&$uz-s#TxXj?%ao%w}-4Qj>|M`Azi+f)MhlU34JHY78(`W4oR8Rly z8V_zK&Vjq=(!})jk73I3IKS=P+l}*sT?ZH^({AiwZx?aQo9Eh4&%g%*1F9A-%|0-9iNsCSv#9FS9R14QI!306{a|D{Splg z0u;fJTi#O<~JEUmdGQd2*TRlXhFr35L-bgI=f*YUlS#7(??99=u#!DsKF8AN^t zHGP>sm-{aLuK@~SX%$Hi=(`-rmRIYk_dx5Zz3#j8v6mM5272wjf$7A+w?=&OYSi0} z8XCmy$RDQe;9-J+2CttdDt3_#s{ioFoiZrL+)a|f;~|glw7P$UrwFeBX!--9sC&Xh zV9eo6?EzP(K!*c>!$m8fnh4Z^{H%UPe;Al8P94kj4(>6JJn}N`FgI6n z6XfHRy_d_0S1u#VUqd=yPIBck`Pg(uy@AE4#HgNDJ8!*h)e2NyrPPoU4iHkR-=PX3<^{9QgRnVz7D z%LL{K+#o=u5x4NTGm|PUz2wKeed1n9dH5_`ycUw0lGt*30d3+O&aUm*Zq_rsg6Ft- LZ`2#{@~-<|$n^OM literal 0 HcmV?d00001 From 11969fe13a5ccc9fa2b04e45b9aa802bce5b0bd4 Mon Sep 17 00:00:00 2001 From: Andrey Mukhin Date: Mon, 5 Sep 2022 13:38:23 +0300 Subject: [PATCH 3/3] Create LICENSE.md --- LICENSE.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE.md diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..33dd012 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Andrey Mukhin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.