From 71bed5545467582216a3775fb925381d0b471d86 Mon Sep 17 00:00:00 2001
From: Andrey Mukhin <amukhin@phystech.edu>
Date: Wed, 5 Jul 2023 15:19:46 +0300
Subject: [PATCH] Added examples

---
 README.md                          | 66 ++++++++++++++++++++----------
 examples/1_save_results.py         | 54 ++++++++++++++++++++++++
 examples/2_directory_processing.py | 33 +++++++++++++++
 examples/3_wavelet.py              | 23 +++++++++++
 examples/4_cstat.py                | 23 +++++++++++
 nuwavdet/nuwavdet.py               | 23 +++++++++--
 setup.py                           |  6 +--
 test.ipynb                         | 62 ++++++++++++++++++++++++++++
 8 files changed, 263 insertions(+), 27 deletions(-)
 create mode 100644 examples/1_save_results.py
 create mode 100644 examples/2_directory_processing.py
 create mode 100644 examples/3_wavelet.py
 create mode 100644 examples/4_cstat.py
 create mode 100644 test.ipynb

diff --git a/README.md b/README.md
index c12bd50..a2b0477 100644
--- a/README.md
+++ b/README.md
@@ -4,34 +4,64 @@ This pacakge is used to generate region masks separating any focused X-ray flux
 
 ## Installation
 This package is to be used with Python 3.x.x
-```python
+```bash
 pip install git+http://heagit.cosmos.ru/nustar/nuwavdet.git
 ```
 
-## Main use
+To update the package to the current version one should delete the previous version
+```bash
+pip uninstall nuwavdet
+```
 
-To use the package in your project, import it in by writing:
+And simply repeat the intallation procedure again from the repository.
+
+## Installation verification
+If the installation was successful the package can be used with the following import:
 
 ```python
 from nuwavdet import nuwavdet as nw
 ```
 
-The main functionality of the pacakge is presented with a single function
+To verify the installation we suggest running a simple script:
+
 ```python
-process(obs_path, thresh)
+from nuwavdet import nuwavdet as nw
+
+print(nw.binary_array(2))
 ```
+
+The output of the script should be
+
+```bash
+[[False False]
+ [False  True]
+ [ True False]
+ [ True  True]]
+```
+
+## Main use
+
+The main functionality of the package is presented with a single function
+```python
+nw.process(obs_path, thresh)
+```
+
 Inputs are string with path to the _cl.evt file to use and a tuple of thresholds, e.g.
 ```python
-process('D:\\Data\\obs_cl.evt', (3, 2))
+nw.process('D:\\Data\\obs_cl.evt', (3, 2))
 ```
 
-Outputs of the function are:
-1. dictionary with some metadata and properties of the observation after mask generation procedure.
-2. region array with mask in DET1 coordinate frame. Note that this mask is for numpy mask application so 1 corresponds to masked pixel and 0 otherwise.
-3. custom bad pixel table with flagged pixels in RAW coordinates. It can be exported as fits file or each separate table can be acessed directly.
-4. array with the sum of wavelet planes used in the processing.
+The detailed script description of the data extraction with the script is presented in the examples folder of the repository.
 
-Metadata about the observation file:
+The function nw.process returns severl python objects:
+1. python-dictionary with some metadata and properties of the observation after mask generation procedure.
+2. region array with mask in DET1 coordinate frame. Note that this mask is for numpy mask application so  True (1) corresponds to masked pixel and False (0) otherwise.
+3. custom bad pixel table with flagged pixels in RAW coordinates. It can be exported as fits file for further application to the nupipeline as fpma_userbpfile or fpmb_userbpfile. 
+4. array with the sum of wavelet planes for potential alternative applications.
+
+Metadata about the observation returned by the nw.process is:
+
+Observation metadata: 
 
 1. OBS_ID
 2. Detector
@@ -47,14 +77,8 @@ Useful algorythm-related data:
 
 ## Other uses
 
-You can process the cl.evt file by creating an Observation class object:
+Other possbile usecases are shown in the examples folder.
 
-```python
-obs = nw.Observation(path_to_evt_file)
-```
+## Contact information
 
-Additionally, the energy band in KeV to get events from can be passed as an argument. The default value is [3,20].
-
-```python
-obs = nuwavsource.Observation(path_to_evt_file,E_borders=[E_min,E_max])
-```
+If you have any questions or issues with the code, feel free to contact Andrey Mukhin: amukhin@cosmos.ru
diff --git a/examples/1_save_results.py b/examples/1_save_results.py
new file mode 100644
index 0000000..15ed00d
--- /dev/null
+++ b/examples/1_save_results.py
@@ -0,0 +1,54 @@
+from nuwavdet import nuwavdet as nw
+
+
+OBS_PATH = r'.//path_to_obs//nu<obsid><DET>01_cl.evt'
+THRESH = (3, 2)
+
+SAVE_BADPIX_PATH = r'.//out//badpix.fits'
+SAVE_REGION_PATH = r'.//out//region.fits'
+SAVE_WAVSUM_PATH = r'.//out//wavsum.fits'
+
+METADATA_PATH = r'.//out//metadata.csv'
+METADATA_FITS_PATH = r'.//out//metadata.fits'
+
+
+if __name__ == '__main__':
+    # PROCESS THE OBSERVATION WITH GIVEN THRESHOLD
+    result, region, region_raw, wav_sum = nw.process(OBS_PATH, thresh=THRESH)
+
+    # SAVE THE REGION BAD PIXEL FILES TO THE FITS FILE WITH NUPIPELINE
+    # COMPATIBLE FORMAT AND HEADERS.
+    region_raw.writeto(SAVE_BADPIX_PATH)
+
+    # SAVE REGION MASK AS A FITS IMAGE
+    nw.save_region(region, SAVE_REGION_PATH, overwrite=False)
+    # Note that the Python script uses numpy masked array with
+    # True (1) as as masked and False (0) as unmasked pixel.
+    # nw.save_region transfers the numpy masked array to
+    # conventional format with 1 for unmasked and 0 for masked pixel.
+    # However, if mask is used in the Python you need to transfer it back with
+    # numpy.logical_not(mask).
+
+    # SAVE WAVSUM ARRAY AS A FITS IMAGE
+    nw.fits.writeto(SAVE_WAVSUM_PATH, wav_sum, overwrite=False)
+
+    # SAVE METADATA
+    # WE SUGGEST SAVING ALL THE METADATA FOR SEVERAL OBSERVATIONS
+    # IN ONE FILE.
+
+    # CREATE CSV FILE TO SAVE DATA
+    # IF FILE ALREADY EXISTS YOU SHOULD REMOVE THIS BLOCK FROM YOUR CODE
+    table = {
+        'obs_id': [], 'detector': [], 'ra': [], 'dec': [],
+        'lon': [], 'lat': [], 't_start': [], 'exposure': [],
+        'count_rate': [], 'remaining_area': [], 'cash_stat': [],
+        'cash_stat_full': []
+    }
+    out_table = nw.DataFrame(table)
+    out_table.to_csv(METADATA_PATH)
+
+    # SAVE DATA TO CREATED CSV
+    nw.DataFrame(result, index=[0]).to_csv(METADATA_PATH, mode='a', header=False)
+
+    # TRANSFORM THE CSV TO FITS-TABLE
+    nw.csv_to_table(METADATA_PATH, METADATA_FITS_PATH)
diff --git a/examples/2_directory_processing.py b/examples/2_directory_processing.py
new file mode 100644
index 0000000..d26b4d9
--- /dev/null
+++ b/examples/2_directory_processing.py
@@ -0,0 +1,33 @@
+from nuwavdet import nuwavdet as nw
+
+INPUT_FOLDER = r'path_to_directory'
+OUTPUT_FOLDER = r'.//Output'
+
+
+if __name__ == '__main__':
+    # BEGIN PROCESSING ALL THE OBSERVATIONS INSIDE THE FOLDER
+    nw.process_folder(input_folder=INPUT_FOLDER,
+                      start_new_file='y',
+                      fits_folder=OUTPUT_FOLDER,
+                      thresh=(3, 2),
+                      cpu_num=10
+                      )
+
+    # IF THE PROCESSING WAS INTERRUPTED YOU CAN CONTINUE IT WITH THE SAME CODE
+    # BY CHANGING THE start_new_file TO 'n'.
+
+    # THE STRUCTURE OF THE FOLDER IS
+    # OUTPUT_FOLDER
+    # __overview.csv                csv-table with observations metadata
+    # __overvies.fits               fits-table with the same metadata
+    # __overview_skipped.csv        csv-table with the skipped observations
+    # __Region                      folder for region mask images
+    # ____<obsid><DET>_region.fits
+    # __Region_raw                  folder for region masks in RAW coordinates
+    # ____<obsid><DET>_reg_raw.fits
+    # __Wav_sum                     folder for sum of wavelet layers
+    # ____<obsid><DET>_wav_sum.fits
+
+    # Note nw.process_folder uses multiprocessing with cpu_num cores.
+    # The number of cores can be manually chosen or automatically
+    # detected if cpu_num = 0. 
\ No newline at end of file
diff --git a/examples/3_wavelet.py b/examples/3_wavelet.py
new file mode 100644
index 0000000..3213f03
--- /dev/null
+++ b/examples/3_wavelet.py
@@ -0,0 +1,23 @@
+from nuwavdet import nuwavdet as nw
+
+OBS_PATH = r'.//path_to_obs//nu<obsid><DET>01_cl.evt'
+THRESH = (3, 2)
+
+
+if __name__ == '__main__':
+    # CREATE THE OBSERVATION CLASS OBJECT
+    obs = nw.Observation(OBS_PATH)
+
+    # CALCULATE THE WAVLET LAYERS WITH GIVEN THRESHOLD
+    wav_layers = obs.wavdecomp(mode='atrous', occ_coeff=True, thresh=THRESH)
+
+    # ALL THE LAYERS CAN BE ACCESSED AS AN ELEMENT OF wav_layers VARIABLE
+    # wav_layers[0] for the 1st wavelet layer
+    # wav_layers[4] for 5th wavelet layer
+    # wav_layers[-1] for the last wavelet layer
+    # wav_layers[2:5] for the list of the layers from 3 to 5
+    # wav_layers[[1, 3, 5]] for the list of layers 2, 4 and 6
+
+    # To calculate the sum of wavelet layers one should use sum() method
+    # wav_layers[2:7].sum(0) returns a sum of layers from 3 to 7
+    # wav_layers[[1, 3, 5]].sum(0) returns a sum of layers 2, 4 and 6.
diff --git a/examples/4_cstat.py b/examples/4_cstat.py
new file mode 100644
index 0000000..c9c0379
--- /dev/null
+++ b/examples/4_cstat.py
@@ -0,0 +1,23 @@
+from nuwavdet import nuwavdet as nw
+import numpy as np
+
+
+OBS_PATH = r'.//path_to_obs//nu<obsid><DET>01_cl.evt'
+MASK_PATH = r'.//path_to_mask//<obsid><DET>.fits'
+
+
+if __name__ == '__main__':
+    # CREATE THE OBSERVATION CLASS OBJECT
+    obs = nw.Observation(OBS_PATH)
+
+    # READ THE REGION MASK FILE
+    region = nw.fits.getdata(MASK_PATH)
+
+    # TRANSFORM REGION MASK DATA TO NUMPY MASK DATA (SEE 1_save_results.py).
+    region = np.logical_not(region.astype(bool))
+
+    # CREATE MASKED ARRAY CLASS OBJECT
+    masked_data = np.ma.masked_array(obs, mask=region)
+
+    # CALCULATE THE CSTAT ON THE MASKED DATA
+    print(nw.сstat(masked_data.mean(), masked_data))
diff --git a/nuwavdet/nuwavdet.py b/nuwavdet/nuwavdet.py
index 802aecf..c92c126 100644
--- a/nuwavdet/nuwavdet.py
+++ b/nuwavdet/nuwavdet.py
@@ -30,6 +30,14 @@ def get_link_list(folder: str, sort_list: bool = True) -> list[str]:
         return np.array(links)
 
 
+def csv_to_table(csv_path, fits_path):
+    """
+    Transform the csv table to fits table with astropy.
+    """
+    csv_file = read_csv(csv_path, index_col=0, dtype={'obs_id': str})
+    Table.from_pandas(csv_file).write(fits_path, overwrite=True)
+
+
 def binary_array(num: int) -> list[list[bool]]:
     """
     Returns list of all possible combinations of num of bool values.
@@ -199,7 +207,8 @@ def count_binning(array, count_per_bin: int = 2):
 
 def cstat(expected, data: list, count_per_bin: int = 2) -> float:
     _data = data.flatten()
-    _data = _data[_data.mask == False]
+    if type(data) is np.ma.masked_array:
+        _data = _data[_data.mask == False]
     _expected = expected
     c_stat = 0
     bin_sum_array, bin_count_array = count_binning(_data, count_per_bin)
@@ -493,7 +502,7 @@ def _process_multi(args):
 
 
 def process_folder(input_folder=None, start_new_file=None, fits_folder=None,
-                   thresh=None):
+                   thresh=None, cpu_num=0):
     """
     Generates a fits-table of parameters, folder with mask images in DET1 and
     BADPIX tables in RAW for all observations in given folder.
@@ -585,6 +594,14 @@ def process_folder(input_folder=None, start_new_file=None, fits_folder=None,
     # START PROCESSING
     print('Started processing...')
     num = 0
+    if cpu_num == 0:
+        cpu_num = cpu_count()
+    elif cpu_num < 0:
+        raise ValueError('cpu_num must be a positive integer')
+    elif cpu_num > cpu_count():
+        print('Chosen cpu_num exceed the number of CPU cores. Using cpu_count() instead.')
+        cpu_num = cpu_count()
+
     for group_idx in range(len(obs_list)//group_size+1):
         print(f'Started group {group_idx}')
         group_list = obs_list[group_size*group_idx:min(group_size*(group_idx+1), len(obs_list))]
@@ -592,7 +609,7 @@ def process_folder(input_folder=None, start_new_file=None, fits_folder=None,
             os.stat(file).st_size/2**20
             for file in group_list
         ]).max()
-        process_num = (cpu_count() if max_size < 50 else (cpu_count()//2 if max_size < 200 else (cpu_count()//4 if max_size < 1000 else 1)))
+        process_num = (cpu_num if max_size < 50 else (cpu_num//2 if max_size < 200 else cpu_num//4 if max_size < 1000 else cpu_num//8))
         print(f"Max file size in group is {max_size:.2f}Mb, create {process_num} processes")
         with get_context('spawn').Pool(processes=process_num) as pool:
             packed_args = map(lambda _: (_, thresh), group_list)
diff --git a/setup.py b/setup.py
index 75fb40b..9d9b2e6 100644
--- a/setup.py
+++ b/setup.py
@@ -5,10 +5,10 @@ with open("README.md", "r") as fh:
 
 setuptools.setup(
     name="nuwavdet",
-    version="0.1.0",
+    version="0.1.1",
     author="Andrey Mukhin",
-    author_email="amukhin@phystech.edu",
-    description="A package for source exclusion in NuStar observation data using wavelet decomposition",
+    author_email="amukhin@cosmos.ru",
+    description="A package for source exclusion in NuSTAR observation data using wavelet decomposition",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/andrey-rrousan/nuwavdet",
diff --git a/test.ipynb b/test.ipynb
new file mode 100644
index 0000000..1718ef7
--- /dev/null
+++ b/test.ipynb
@@ -0,0 +1,62 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nuwavdet import nuwavdet as nw"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[False False]\n",
+      " [False  True]\n",
+      " [ True False]\n",
+      " [ True  True]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(nw.binary_array(2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}