From 71bed5545467582216a3775fb925381d0b471d86 Mon Sep 17 00:00:00 2001 From: Andrey Mukhin Date: Wed, 5 Jul 2023 15:19:46 +0300 Subject: [PATCH] Added examples --- README.md | 66 ++++++++++++++++++++---------- examples/1_save_results.py | 54 ++++++++++++++++++++++++ examples/2_directory_processing.py | 33 +++++++++++++++ examples/3_wavelet.py | 23 +++++++++++ examples/4_cstat.py | 23 +++++++++++ nuwavdet/nuwavdet.py | 23 +++++++++-- setup.py | 6 +-- test.ipynb | 62 ++++++++++++++++++++++++++++ 8 files changed, 263 insertions(+), 27 deletions(-) create mode 100644 examples/1_save_results.py create mode 100644 examples/2_directory_processing.py create mode 100644 examples/3_wavelet.py create mode 100644 examples/4_cstat.py create mode 100644 test.ipynb diff --git a/README.md b/README.md index c12bd50..a2b0477 100644 --- a/README.md +++ b/README.md @@ -4,34 +4,64 @@ This pacakge is used to generate region masks separating any focused X-ray flux ## Installation This package is to be used with Python 3.x.x -```python +```bash pip install git+http://heagit.cosmos.ru/nustar/nuwavdet.git ``` -## Main use +To update the package to the current version one should delete the previous version +```bash +pip uninstall nuwavdet +``` -To use the package in your project, import it in by writing: +And simply repeat the intallation procedure again from the repository. + +## Installation verification +If the installation was successful the package can be used with the following import: ```python from nuwavdet import nuwavdet as nw ``` -The main functionality of the pacakge is presented with a single function +To verify the installation we suggest running a simple script: + ```python -process(obs_path, thresh) +from nuwavdet import nuwavdet as nw + +print(nw.binary_array(2)) ``` + +The output of the script should be + +```bash +[[False False] + [False True] + [ True False] + [ True True]] +``` + +## Main use + +The main functionality of the package is presented with a single function +```python +nw.process(obs_path, thresh) +``` + Inputs are string with path to the _cl.evt file to use and a tuple of thresholds, e.g. ```python -process('D:\\Data\\obs_cl.evt', (3, 2)) +nw.process('D:\\Data\\obs_cl.evt', (3, 2)) ``` -Outputs of the function are: -1. dictionary with some metadata and properties of the observation after mask generation procedure. -2. region array with mask in DET1 coordinate frame. Note that this mask is for numpy mask application so 1 corresponds to masked pixel and 0 otherwise. -3. custom bad pixel table with flagged pixels in RAW coordinates. It can be exported as fits file or each separate table can be acessed directly. -4. array with the sum of wavelet planes used in the processing. +The detailed script description of the data extraction with the script is presented in the examples folder of the repository. -Metadata about the observation file: +The function nw.process returns severl python objects: +1. python-dictionary with some metadata and properties of the observation after mask generation procedure. +2. region array with mask in DET1 coordinate frame. Note that this mask is for numpy mask application so True (1) corresponds to masked pixel and False (0) otherwise. +3. custom bad pixel table with flagged pixels in RAW coordinates. It can be exported as fits file for further application to the nupipeline as fpma_userbpfile or fpmb_userbpfile. +4. array with the sum of wavelet planes for potential alternative applications. + +Metadata about the observation returned by the nw.process is: + +Observation metadata: 1. OBS_ID 2. Detector @@ -47,14 +77,8 @@ Useful algorythm-related data: ## Other uses -You can process the cl.evt file by creating an Observation class object: +Other possbile usecases are shown in the examples folder. -```python -obs = nw.Observation(path_to_evt_file) -``` +## Contact information -Additionally, the energy band in KeV to get events from can be passed as an argument. The default value is [3,20]. - -```python -obs = nuwavsource.Observation(path_to_evt_file,E_borders=[E_min,E_max]) -``` +If you have any questions or issues with the code, feel free to contact Andrey Mukhin: amukhin@cosmos.ru diff --git a/examples/1_save_results.py b/examples/1_save_results.py new file mode 100644 index 0000000..15ed00d --- /dev/null +++ b/examples/1_save_results.py @@ -0,0 +1,54 @@ +from nuwavdet import nuwavdet as nw + + +OBS_PATH = r'.//path_to_obs//nu01_cl.evt' +THRESH = (3, 2) + +SAVE_BADPIX_PATH = r'.//out//badpix.fits' +SAVE_REGION_PATH = r'.//out//region.fits' +SAVE_WAVSUM_PATH = r'.//out//wavsum.fits' + +METADATA_PATH = r'.//out//metadata.csv' +METADATA_FITS_PATH = r'.//out//metadata.fits' + + +if __name__ == '__main__': + # PROCESS THE OBSERVATION WITH GIVEN THRESHOLD + result, region, region_raw, wav_sum = nw.process(OBS_PATH, thresh=THRESH) + + # SAVE THE REGION BAD PIXEL FILES TO THE FITS FILE WITH NUPIPELINE + # COMPATIBLE FORMAT AND HEADERS. + region_raw.writeto(SAVE_BADPIX_PATH) + + # SAVE REGION MASK AS A FITS IMAGE + nw.save_region(region, SAVE_REGION_PATH, overwrite=False) + # Note that the Python script uses numpy masked array with + # True (1) as as masked and False (0) as unmasked pixel. + # nw.save_region transfers the numpy masked array to + # conventional format with 1 for unmasked and 0 for masked pixel. + # However, if mask is used in the Python you need to transfer it back with + # numpy.logical_not(mask). + + # SAVE WAVSUM ARRAY AS A FITS IMAGE + nw.fits.writeto(SAVE_WAVSUM_PATH, wav_sum, overwrite=False) + + # SAVE METADATA + # WE SUGGEST SAVING ALL THE METADATA FOR SEVERAL OBSERVATIONS + # IN ONE FILE. + + # CREATE CSV FILE TO SAVE DATA + # IF FILE ALREADY EXISTS YOU SHOULD REMOVE THIS BLOCK FROM YOUR CODE + table = { + 'obs_id': [], 'detector': [], 'ra': [], 'dec': [], + 'lon': [], 'lat': [], 't_start': [], 'exposure': [], + 'count_rate': [], 'remaining_area': [], 'cash_stat': [], + 'cash_stat_full': [] + } + out_table = nw.DataFrame(table) + out_table.to_csv(METADATA_PATH) + + # SAVE DATA TO CREATED CSV + nw.DataFrame(result, index=[0]).to_csv(METADATA_PATH, mode='a', header=False) + + # TRANSFORM THE CSV TO FITS-TABLE + nw.csv_to_table(METADATA_PATH, METADATA_FITS_PATH) diff --git a/examples/2_directory_processing.py b/examples/2_directory_processing.py new file mode 100644 index 0000000..d26b4d9 --- /dev/null +++ b/examples/2_directory_processing.py @@ -0,0 +1,33 @@ +from nuwavdet import nuwavdet as nw + +INPUT_FOLDER = r'path_to_directory' +OUTPUT_FOLDER = r'.//Output' + + +if __name__ == '__main__': + # BEGIN PROCESSING ALL THE OBSERVATIONS INSIDE THE FOLDER + nw.process_folder(input_folder=INPUT_FOLDER, + start_new_file='y', + fits_folder=OUTPUT_FOLDER, + thresh=(3, 2), + cpu_num=10 + ) + + # IF THE PROCESSING WAS INTERRUPTED YOU CAN CONTINUE IT WITH THE SAME CODE + # BY CHANGING THE start_new_file TO 'n'. + + # THE STRUCTURE OF THE FOLDER IS + # OUTPUT_FOLDER + # __overview.csv csv-table with observations metadata + # __overvies.fits fits-table with the same metadata + # __overview_skipped.csv csv-table with the skipped observations + # __Region folder for region mask images + # _____region.fits + # __Region_raw folder for region masks in RAW coordinates + # _____reg_raw.fits + # __Wav_sum folder for sum of wavelet layers + # _____wav_sum.fits + + # Note nw.process_folder uses multiprocessing with cpu_num cores. + # The number of cores can be manually chosen or automatically + # detected if cpu_num = 0. \ No newline at end of file diff --git a/examples/3_wavelet.py b/examples/3_wavelet.py new file mode 100644 index 0000000..3213f03 --- /dev/null +++ b/examples/3_wavelet.py @@ -0,0 +1,23 @@ +from nuwavdet import nuwavdet as nw + +OBS_PATH = r'.//path_to_obs//nu01_cl.evt' +THRESH = (3, 2) + + +if __name__ == '__main__': + # CREATE THE OBSERVATION CLASS OBJECT + obs = nw.Observation(OBS_PATH) + + # CALCULATE THE WAVLET LAYERS WITH GIVEN THRESHOLD + wav_layers = obs.wavdecomp(mode='atrous', occ_coeff=True, thresh=THRESH) + + # ALL THE LAYERS CAN BE ACCESSED AS AN ELEMENT OF wav_layers VARIABLE + # wav_layers[0] for the 1st wavelet layer + # wav_layers[4] for 5th wavelet layer + # wav_layers[-1] for the last wavelet layer + # wav_layers[2:5] for the list of the layers from 3 to 5 + # wav_layers[[1, 3, 5]] for the list of layers 2, 4 and 6 + + # To calculate the sum of wavelet layers one should use sum() method + # wav_layers[2:7].sum(0) returns a sum of layers from 3 to 7 + # wav_layers[[1, 3, 5]].sum(0) returns a sum of layers 2, 4 and 6. diff --git a/examples/4_cstat.py b/examples/4_cstat.py new file mode 100644 index 0000000..c9c0379 --- /dev/null +++ b/examples/4_cstat.py @@ -0,0 +1,23 @@ +from nuwavdet import nuwavdet as nw +import numpy as np + + +OBS_PATH = r'.//path_to_obs//nu01_cl.evt' +MASK_PATH = r'.//path_to_mask//.fits' + + +if __name__ == '__main__': + # CREATE THE OBSERVATION CLASS OBJECT + obs = nw.Observation(OBS_PATH) + + # READ THE REGION MASK FILE + region = nw.fits.getdata(MASK_PATH) + + # TRANSFORM REGION MASK DATA TO NUMPY MASK DATA (SEE 1_save_results.py). + region = np.logical_not(region.astype(bool)) + + # CREATE MASKED ARRAY CLASS OBJECT + masked_data = np.ma.masked_array(obs, mask=region) + + # CALCULATE THE CSTAT ON THE MASKED DATA + print(nw.сstat(masked_data.mean(), masked_data)) diff --git a/nuwavdet/nuwavdet.py b/nuwavdet/nuwavdet.py index 802aecf..c92c126 100644 --- a/nuwavdet/nuwavdet.py +++ b/nuwavdet/nuwavdet.py @@ -30,6 +30,14 @@ def get_link_list(folder: str, sort_list: bool = True) -> list[str]: return np.array(links) +def csv_to_table(csv_path, fits_path): + """ + Transform the csv table to fits table with astropy. + """ + csv_file = read_csv(csv_path, index_col=0, dtype={'obs_id': str}) + Table.from_pandas(csv_file).write(fits_path, overwrite=True) + + def binary_array(num: int) -> list[list[bool]]: """ Returns list of all possible combinations of num of bool values. @@ -199,7 +207,8 @@ def count_binning(array, count_per_bin: int = 2): def cstat(expected, data: list, count_per_bin: int = 2) -> float: _data = data.flatten() - _data = _data[_data.mask == False] + if type(data) is np.ma.masked_array: + _data = _data[_data.mask == False] _expected = expected c_stat = 0 bin_sum_array, bin_count_array = count_binning(_data, count_per_bin) @@ -493,7 +502,7 @@ def _process_multi(args): def process_folder(input_folder=None, start_new_file=None, fits_folder=None, - thresh=None): + thresh=None, cpu_num=0): """ Generates a fits-table of parameters, folder with mask images in DET1 and BADPIX tables in RAW for all observations in given folder. @@ -585,6 +594,14 @@ def process_folder(input_folder=None, start_new_file=None, fits_folder=None, # START PROCESSING print('Started processing...') num = 0 + if cpu_num == 0: + cpu_num = cpu_count() + elif cpu_num < 0: + raise ValueError('cpu_num must be a positive integer') + elif cpu_num > cpu_count(): + print('Chosen cpu_num exceed the number of CPU cores. Using cpu_count() instead.') + cpu_num = cpu_count() + for group_idx in range(len(obs_list)//group_size+1): print(f'Started group {group_idx}') group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1), len(obs_list))] @@ -592,7 +609,7 @@ def process_folder(input_folder=None, start_new_file=None, fits_folder=None, os.stat(file).st_size/2**20 for file in group_list ]).max() - process_num = (cpu_count() if max_size < 50 else (cpu_count()//2 if max_size < 200 else (cpu_count()//4 if max_size < 1000 else 1))) + process_num = (cpu_num if max_size < 50 else (cpu_num//2 if max_size < 200 else cpu_num//4 if max_size < 1000 else cpu_num//8)) print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes") with get_context('spawn').Pool(processes=process_num) as pool: packed_args = map(lambda _: (_, thresh), group_list) diff --git a/setup.py b/setup.py index 75fb40b..9d9b2e6 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ with open("README.md", "r") as fh: setuptools.setup( name="nuwavdet", - version="0.1.0", + version="0.1.1", author="Andrey Mukhin", - author_email="amukhin@phystech.edu", - description="A package for source exclusion in NuStar observation data using wavelet decomposition", + author_email="amukhin@cosmos.ru", + description="A package for source exclusion in NuSTAR observation data using wavelet decomposition", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/andrey-rrousan/nuwavdet", diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 0000000..1718ef7 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from nuwavdet import nuwavdet as nw" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[False False]\n", + " [False True]\n", + " [ True False]\n", + " [ True True]]\n" + ] + } + ], + "source": [ + "print(nw.binary_array(2))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}