ridge/scripts/03_plot_skyreg.py

#!/usr/bin/env python

__author__      = "Roman Krivonos"
__copyright__   = "Space Research Institute (IKI)"

import numpy as np
import pandas as pd
from astropy.io import fits
from astropy.table import Table, Column
import matplotlib.pyplot as plt
import math, sys
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

#from statsmodels.robust.scale import huber
from astropy.stats import sigma_clip
from astropy.stats import sigma_clipped_stats

from scipy.stats import norm
from scipy.stats import describe
from scipy.stats import sem


from numpy import absolute
from numpy import arange

from ridge.utils import *
from ridge.config import *

skey = sys.argv[1]
enkey = sys.argv[2]

if not skey in skyreg.keys():
    print("{} not found in {}".format(skey,list(skyreg.keys())))
    sys.exit()

key = "ALL"

fn="detcnts.{}.{}.resid.fits".format(enkey,key)
print("Reading {}".format(proddir+fn))
dat = Table.read(proddir+fn)
df = dat.to_pandas()


n_bins = 60
sigma=3

df = df.query('LON > {} & LON < {} & LAT > {} & LAT < {} & BASE < {}'.format(skyreg[skey]['lon'] - skyreg[skey]['wlon']/2,
                                                                                 skyreg[skey]['lon'] + skyreg[skey]['wlon']/2,
                                                                                 skyreg[skey]['lat'] - skyreg[skey]['wlat']/2,
                                                                                 skyreg[skey]['lat'] + skyreg[skey]['wlat']/2,
                                                                                 basemin))
print("Selected {} ScWs".format(df.shape[0]))

plt.scatter(df['LON'],df['LAT'])
plt.title(skey)
plt.ylabel("GLAT")
plt.xlabel("GLON")
plt.show()

plt.scatter(df['REV'],df['BASE'])
plt.title("Nearest orbit with measured background")
plt.ylabel("Base")
plt.xlabel("Orbit")
plt.show()

rev = np.array(df['REV'])
grxe = np.array(df['GRXE'])
grxe_err = np.array(df['GRXE_ERR'])


mean = np.mean(grxe)
std = np.std(grxe)
print("mean {:.2f} std {:.2f}".format(mean,std))
print("min    {:.2f}".format(grxe.min()))
print("max    {:.2f}".format(grxe.max()))
print("mean   {:.2f}".format(grxe.mean()))
print("median {:.2f}".format(np.median(grxe)))
print("var    {:.2f}".format(grxe.var()))
sstr = '%-14s mean = %6.4f, variance = %6.4f, skew = %6.4f, kurtosis = %6.4f'
n, (smin, smax), sm, sv, ss, sk = describe(grxe)
print(sstr % ('sample:', sm, sv, ss, sk))

# Calculate the percentiles across the x dimension
errmax=np.max(grxe_err)
perc = np.percentile(grxe_err, grxe_err_cut, axis=0, keepdims=False)
print("{} {}: {}% cut of GRXE ERR: {:.2f} mCrab".format(skey,enkey,sem_cut,perc))
idx=np.where(grxe_err < perc)

plt.hist(grxe_err, bins=n_bins, range=[0,errmax], color="red")
rev=rev[idx]
grxe=grxe[idx]
grxe_err=grxe_err[idx]
plt.hist(grxe_err, bins=n_bins, range=[0,errmax], color="grey")

plt.xlabel("GRXE ERROR, mCrab")
plt.title("Distribution of errors {}".format(skey))
plt.show()

grxe_sign=np.divide(grxe,grxe_err)
plt.hist(grxe_sign, bins=n_bins)
plt.xlabel("GRXE S/N")
plt.title("Distribution of significance {}".format(skey))
plt.show()

A=np.sum(grxe/grxe_err**2)
B=np.sum(1.0/grxe_err**2)
wgt_mean=A/B
wgt_mean_err=np.sqrt(1.0/B)

print("Weighted mean: {:.2f}+/-{:.2f} normal mean: {:.2f}".format(wgt_mean,wgt_mean_err,np.mean(grxe)))

#filtered_grxe = sigma_clip(grxe, sigma=sigma, maxiters=10)
filtered_data = sigma_clip(grxe, sigma=sigma, maxiters=10, return_bounds=True)
filtered_grxe = filtered_data[0]
filtered_min  = filtered_data[1]
filtered_max  = filtered_data[2]

sg_mean, sg_med, sg_std = sigma_clipped_stats(grxe, sigma=sigma, maxiters=10)
sg_sem = sem(grxe[filtered_grxe.mask==False])

print("Sigma clipping: mean {:.2f} med {:.2f} std {:.2f} sem {:.2f}".format(sg_mean, sg_med, sg_std, sg_sem))
k=1.2
plt.hist(grxe, bins=n_bins, range=[filtered_min*k, filtered_max*k])
plt.hist(grxe[filtered_grxe.mask], bins=n_bins, range=[filtered_min*k, filtered_max*k])
plt.axvline(sg_mean, color="black")
plt.axvline(sg_mean+sg_std, color="blue", linestyle="dashed")
plt.axvline(sg_mean-sg_std, color="blue", linestyle="dashed")
plt.axvline(sg_mean+sg_sem, color="black", linestyle="dashed")
plt.axvline(sg_mean-sg_sem, color="black", linestyle="dashed")
plt.xlabel("GRXE, mCrab")
plt.title(skey)
plt.show()

# show cumulative mean
indices = sorted(
    range(len(rev)),
    key=lambda index: rev[index]
)
rev_sorted = [rev[index] for index in indices]
grxe_sorted = [grxe[index] for index in indices]

plt.scatter(rev_sorted,grxe_sorted)
plt.axhline(sg_mean, color="black")
plt.axhline(sg_mean+sg_std, color="blue", linestyle="dashed")
plt.axhline(sg_mean-sg_std, color="blue", linestyle="dashed")
plt.axhline(sg_mean+sg_sem, color="black", linestyle="dashed")
plt.axhline(sg_mean-sg_sem, color="black", linestyle="dashed")
plt.ylim([filtered_min*k, filtered_max*k])
#plt.xlim([60, 100])
plt.title("GRXE as a function of orbit")
plt.ylabel("GRXE, mCrab")
plt.xlabel("Orbit")
plt.show()


#grxe_cum = np.cumsum(grxe_sorted, dtype=float, axis=0)
#grxe_cum = cum_mean(grxe_sorted)
grxe_cum = cum_mean_rev(rev_sorted,grxe_sorted)
plt.scatter(rev_sorted,grxe_cum)
plt.plot(rev_sorted,grxe_cum)
plt.axhline(sg_mean, color="black")
plt.axhline(grxe.mean(), color="green", linestyle="dashed")
plt.axhline(sg_mean+sg_std, color="blue", linestyle="dashed")
plt.axhline(sg_mean-sg_std, color="blue", linestyle="dashed")
plt.axhline(sg_mean+sg_sem, color="black", linestyle="dashed")
plt.axhline(sg_mean-sg_sem, color="black", linestyle="dashed")
plt.ylim([filtered_min*k, filtered_max*k])
plt.title("Cumulative mean as a function of orbit")
plt.ylabel("GRXE, mCrab")
plt.xlabel("Orbit")
plt.show()