Added examples

This commit is contained in:
Andrey Mukhin 2023-07-05 15:19:46 +03:00
parent fd708d8170
commit 71bed55454
8 changed files with 263 additions and 27 deletions

View File

@ -4,34 +4,64 @@ This pacakge is used to generate region masks separating any focused X-ray flux
## Installation
This package is to be used with Python 3.x.x
```python
```bash
pip install git+http://heagit.cosmos.ru/nustar/nuwavdet.git
```
## Main use
To update the package to the current version one should delete the previous version
```bash
pip uninstall nuwavdet
```
To use the package in your project, import it in by writing:
And simply repeat the intallation procedure again from the repository.
## Installation verification
If the installation was successful the package can be used with the following import:
```python
from nuwavdet import nuwavdet as nw
```
The main functionality of the pacakge is presented with a single function
To verify the installation we suggest running a simple script:
```python
process(obs_path, thresh)
from nuwavdet import nuwavdet as nw
print(nw.binary_array(2))
```
The output of the script should be
```bash
[[False False]
[False True]
[ True False]
[ True True]]
```
## Main use
The main functionality of the package is presented with a single function
```python
nw.process(obs_path, thresh)
```
Inputs are string with path to the _cl.evt file to use and a tuple of thresholds, e.g.
```python
process('D:\\Data\\obs_cl.evt', (3, 2))
nw.process('D:\\Data\\obs_cl.evt', (3, 2))
```
Outputs of the function are:
1. dictionary with some metadata and properties of the observation after mask generation procedure.
2. region array with mask in DET1 coordinate frame. Note that this mask is for numpy mask application so 1 corresponds to masked pixel and 0 otherwise.
3. custom bad pixel table with flagged pixels in RAW coordinates. It can be exported as fits file or each separate table can be acessed directly.
4. array with the sum of wavelet planes used in the processing.
The detailed script description of the data extraction with the script is presented in the examples folder of the repository.
Metadata about the observation file:
The function nw.process returns severl python objects:
1. python-dictionary with some metadata and properties of the observation after mask generation procedure.
2. region array with mask in DET1 coordinate frame. Note that this mask is for numpy mask application so True (1) corresponds to masked pixel and False (0) otherwise.
3. custom bad pixel table with flagged pixels in RAW coordinates. It can be exported as fits file for further application to the nupipeline as fpma_userbpfile or fpmb_userbpfile.
4. array with the sum of wavelet planes for potential alternative applications.
Metadata about the observation returned by the nw.process is:
Observation metadata:
1. OBS_ID
2. Detector
@ -47,14 +77,8 @@ Useful algorythm-related data:
## Other uses
You can process the cl.evt file by creating an Observation class object:
Other possbile usecases are shown in the examples folder.
```python
obs = nw.Observation(path_to_evt_file)
```
## Contact information
Additionally, the energy band in KeV to get events from can be passed as an argument. The default value is [3,20].
```python
obs = nuwavsource.Observation(path_to_evt_file,E_borders=[E_min,E_max])
```
If you have any questions or issues with the code, feel free to contact Andrey Mukhin: amukhin@cosmos.ru

View File

@ -0,0 +1,54 @@
from nuwavdet import nuwavdet as nw
OBS_PATH = r'.//path_to_obs//nu<obsid><DET>01_cl.evt'
THRESH = (3, 2)
SAVE_BADPIX_PATH = r'.//out//badpix.fits'
SAVE_REGION_PATH = r'.//out//region.fits'
SAVE_WAVSUM_PATH = r'.//out//wavsum.fits'
METADATA_PATH = r'.//out//metadata.csv'
METADATA_FITS_PATH = r'.//out//metadata.fits'
if __name__ == '__main__':
# PROCESS THE OBSERVATION WITH GIVEN THRESHOLD
result, region, region_raw, wav_sum = nw.process(OBS_PATH, thresh=THRESH)
# SAVE THE REGION BAD PIXEL FILES TO THE FITS FILE WITH NUPIPELINE
# COMPATIBLE FORMAT AND HEADERS.
region_raw.writeto(SAVE_BADPIX_PATH)
# SAVE REGION MASK AS A FITS IMAGE
nw.save_region(region, SAVE_REGION_PATH, overwrite=False)
# Note that the Python script uses numpy masked array with
# True (1) as as masked and False (0) as unmasked pixel.
# nw.save_region transfers the numpy masked array to
# conventional format with 1 for unmasked and 0 for masked pixel.
# However, if mask is used in the Python you need to transfer it back with
# numpy.logical_not(mask).
# SAVE WAVSUM ARRAY AS A FITS IMAGE
nw.fits.writeto(SAVE_WAVSUM_PATH, wav_sum, overwrite=False)
# SAVE METADATA
# WE SUGGEST SAVING ALL THE METADATA FOR SEVERAL OBSERVATIONS
# IN ONE FILE.
# CREATE CSV FILE TO SAVE DATA
# IF FILE ALREADY EXISTS YOU SHOULD REMOVE THIS BLOCK FROM YOUR CODE
table = {
'obs_id': [], 'detector': [], 'ra': [], 'dec': [],
'lon': [], 'lat': [], 't_start': [], 'exposure': [],
'count_rate': [], 'remaining_area': [], 'cash_stat': [],
'cash_stat_full': []
}
out_table = nw.DataFrame(table)
out_table.to_csv(METADATA_PATH)
# SAVE DATA TO CREATED CSV
nw.DataFrame(result, index=[0]).to_csv(METADATA_PATH, mode='a', header=False)
# TRANSFORM THE CSV TO FITS-TABLE
nw.csv_to_table(METADATA_PATH, METADATA_FITS_PATH)

View File

@ -0,0 +1,33 @@
from nuwavdet import nuwavdet as nw
INPUT_FOLDER = r'path_to_directory'
OUTPUT_FOLDER = r'.//Output'
if __name__ == '__main__':
# BEGIN PROCESSING ALL THE OBSERVATIONS INSIDE THE FOLDER
nw.process_folder(input_folder=INPUT_FOLDER,
start_new_file='y',
fits_folder=OUTPUT_FOLDER,
thresh=(3, 2),
cpu_num=10
)
# IF THE PROCESSING WAS INTERRUPTED YOU CAN CONTINUE IT WITH THE SAME CODE
# BY CHANGING THE start_new_file TO 'n'.
# THE STRUCTURE OF THE FOLDER IS
# OUTPUT_FOLDER
# __overview.csv csv-table with observations metadata
# __overvies.fits fits-table with the same metadata
# __overview_skipped.csv csv-table with the skipped observations
# __Region folder for region mask images
# ____<obsid><DET>_region.fits
# __Region_raw folder for region masks in RAW coordinates
# ____<obsid><DET>_reg_raw.fits
# __Wav_sum folder for sum of wavelet layers
# ____<obsid><DET>_wav_sum.fits
# Note nw.process_folder uses multiprocessing with cpu_num cores.
# The number of cores can be manually chosen or automatically
# detected if cpu_num = 0.

23
examples/3_wavelet.py Normal file
View File

@ -0,0 +1,23 @@
from nuwavdet import nuwavdet as nw
OBS_PATH = r'.//path_to_obs//nu<obsid><DET>01_cl.evt'
THRESH = (3, 2)
if __name__ == '__main__':
# CREATE THE OBSERVATION CLASS OBJECT
obs = nw.Observation(OBS_PATH)
# CALCULATE THE WAVLET LAYERS WITH GIVEN THRESHOLD
wav_layers = obs.wavdecomp(mode='atrous', occ_coeff=True, thresh=THRESH)
# ALL THE LAYERS CAN BE ACCESSED AS AN ELEMENT OF wav_layers VARIABLE
# wav_layers[0] for the 1st wavelet layer
# wav_layers[4] for 5th wavelet layer
# wav_layers[-1] for the last wavelet layer
# wav_layers[2:5] for the list of the layers from 3 to 5
# wav_layers[[1, 3, 5]] for the list of layers 2, 4 and 6
# To calculate the sum of wavelet layers one should use sum() method
# wav_layers[2:7].sum(0) returns a sum of layers from 3 to 7
# wav_layers[[1, 3, 5]].sum(0) returns a sum of layers 2, 4 and 6.

23
examples/4_cstat.py Normal file
View File

@ -0,0 +1,23 @@
from nuwavdet import nuwavdet as nw
import numpy as np
OBS_PATH = r'.//path_to_obs//nu<obsid><DET>01_cl.evt'
MASK_PATH = r'.//path_to_mask//<obsid><DET>.fits'
if __name__ == '__main__':
# CREATE THE OBSERVATION CLASS OBJECT
obs = nw.Observation(OBS_PATH)
# READ THE REGION MASK FILE
region = nw.fits.getdata(MASK_PATH)
# TRANSFORM REGION MASK DATA TO NUMPY MASK DATA (SEE 1_save_results.py).
region = np.logical_not(region.astype(bool))
# CREATE MASKED ARRAY CLASS OBJECT
masked_data = np.ma.masked_array(obs, mask=region)
# CALCULATE THE CSTAT ON THE MASKED DATA
print(nw.сstat(masked_data.mean(), masked_data))

View File

@ -30,6 +30,14 @@ def get_link_list(folder: str, sort_list: bool = True) -> list[str]:
return np.array(links)
def csv_to_table(csv_path, fits_path):
"""
Transform the csv table to fits table with astropy.
"""
csv_file = read_csv(csv_path, index_col=0, dtype={'obs_id': str})
Table.from_pandas(csv_file).write(fits_path, overwrite=True)
def binary_array(num: int) -> list[list[bool]]:
"""
Returns list of all possible combinations of num of bool values.
@ -199,7 +207,8 @@ def count_binning(array, count_per_bin: int = 2):
def cstat(expected, data: list, count_per_bin: int = 2) -> float:
_data = data.flatten()
_data = _data[_data.mask == False]
if type(data) is np.ma.masked_array:
_data = _data[_data.mask == False]
_expected = expected
c_stat = 0
bin_sum_array, bin_count_array = count_binning(_data, count_per_bin)
@ -493,7 +502,7 @@ def _process_multi(args):
def process_folder(input_folder=None, start_new_file=None, fits_folder=None,
thresh=None):
thresh=None, cpu_num=0):
"""
Generates a fits-table of parameters, folder with mask images in DET1 and
BADPIX tables in RAW for all observations in given folder.
@ -585,6 +594,14 @@ def process_folder(input_folder=None, start_new_file=None, fits_folder=None,
# START PROCESSING
print('Started processing...')
num = 0
if cpu_num == 0:
cpu_num = cpu_count()
elif cpu_num < 0:
raise ValueError('cpu_num must be a positive integer')
elif cpu_num > cpu_count():
print('Chosen cpu_num exceed the number of CPU cores. Using cpu_count() instead.')
cpu_num = cpu_count()
for group_idx in range(len(obs_list)//group_size+1):
print(f'Started group {group_idx}')
group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1), len(obs_list))]
@ -592,7 +609,7 @@ def process_folder(input_folder=None, start_new_file=None, fits_folder=None,
os.stat(file).st_size/2**20
for file in group_list
]).max()
process_num = (cpu_count() if max_size < 50 else (cpu_count()//2 if max_size < 200 else (cpu_count()//4 if max_size < 1000 else 1)))
process_num = (cpu_num if max_size < 50 else (cpu_num//2 if max_size < 200 else cpu_num//4 if max_size < 1000 else cpu_num//8))
print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
with get_context('spawn').Pool(processes=process_num) as pool:
packed_args = map(lambda _: (_, thresh), group_list)

View File

@ -5,10 +5,10 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="nuwavdet",
version="0.1.0",
version="0.1.1",
author="Andrey Mukhin",
author_email="amukhin@phystech.edu",
description="A package for source exclusion in NuStar observation data using wavelet decomposition",
author_email="amukhin@cosmos.ru",
description="A package for source exclusion in NuSTAR observation data using wavelet decomposition",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/andrey-rrousan/nuwavdet",

62
test.ipynb Normal file
View File

@ -0,0 +1,62 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from nuwavdet import nuwavdet as nw"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[False False]\n",
" [False True]\n",
" [ True False]\n",
" [ True True]]\n"
]
}
],
"source": [
"print(nw.binary_array(2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}