ridge/scripts/02_grxe_resid_plot.py

#!/usr/bin/env python

__author__      = "Roman Krivonos"
__copyright__   = "Space Research Institute (IKI)"

import numpy as np
import pandas as pd
from astropy.io import fits
from astropy.table import Table, Column
from astropy import units as u

import matplotlib.pyplot as plt
import math, sys, os
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

#from statsmodels.robust.scale import huber
from astropy.stats import sigma_clip
from astropy.stats import sigma_clipped_stats

from scipy.stats import norm
from scipy.stats import describe
from scipy.stats import sem
import subprocess

from numpy import absolute
from numpy import arange

from ridge.utils import *
from ridge.config import *

inkey="BKG"
skey="GRXE-BKG"

sigma=3
plotme=False


with open(ignored_rev_file, 'rb') as fp:
    ignored_rev = pickle.load(fp)
    print("{} orbits ignored".format(len(ignored_rev)))

ign=ignored_rev.tolist()

enkey="A01"
fn="detcnts.{}.{}.resid.fits".format(enkey,inkey)
dat = Table.read(proddir+fn)
df = dat.to_pandas()
print("N={}".format(df.shape[0]))
query = "REV != @ign"

df = df.query(query)
print("{} N={}".format(query, df.shape[0]))

t = Table.from_pandas(df)
t.write("{}/{}.{}.resid_filtered_rev.fits".format(proddir,inkey,enkey),overwrite=True)

fresid1="{}/{}.{}.resid_filtered_spec.fits".format(proddir,inkey,enkey)
sg_mean,sg_sem,skew_val,skew_err = get_spec(df, sigma=sigma,  grxe_err_cut=grxe_err_cut, skey=skey, enkey=enkey, plotme=True, gaussfit=True, fout=fresid1)


enkey="A02"
fn="detcnts.{}.{}.resid.fits".format(enkey,inkey)
dat = Table.read(proddir+fn)
df = dat.to_pandas()
print("N={}".format(df.shape[0]))
query = "REV != @ign"

df = df.query(query)
print("{} N={}".format(query, df.shape[0]))

t = Table.from_pandas(df)
t.write("{}/{}.{}.resid_filtered_rev.fits".format(proddir,inkey,enkey),overwrite=True)

fresid2="{}/{}.{}.resid_filtered_spec.fits".format(proddir,inkey,enkey)
sg_mean,sg_sem, skew_val, skew_err = get_spec(df, sigma=sigma,  grxe_err_cut=grxe_err_cut, skey=skey, enkey=enkey, plotme=True, gaussfit=True, fout=fresid2)


enkey="A03"
fn="detcnts.{}.{}.resid.fits".format(enkey,inkey)
dat = Table.read(proddir+fn)
df = dat.to_pandas()
print("N={}".format(df.shape[0]))
query = "REV != @ign"

df = df.query(query)
print("{} N={}".format(query, df.shape[0]))

t = Table.from_pandas(df)
t.write("{}/{}.{}.resid_filtered_rev.fits".format(proddir,inkey,enkey),overwrite=True)

fresid3="{}/{}.{}.resid_filtered_spec.fits".format(proddir,inkey,enkey)
sg_mean,sg_sem, skew_val, skew_err = get_spec(df, sigma=sigma,  grxe_err_cut=grxe_err_cut, skey=skey, enkey=enkey, plotme=True, gaussfit=True, fout=fresid3)

###
### Plot light curve
###

scale=1.0E-2
dat = Table.read(fresid1)
df1 = dat.to_pandas().sort_values(by=['REV'])

dat = Table.read(fresid2)
df2 = dat.to_pandas().sort_values(by=['REV'])

dat = Table.read(fresid3)
df3 = dat.to_pandas().sort_values(by=['REV'])

s=2
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, figsize=(9, 7), dpi=100)
#fig.suptitle('Vertically stacked subplots')
#plt.figure(figsize=(8, 6), dpi=80)

for axis in ['top','bottom','left','right']:
    ax1.spines[axis].set_linewidth(1)
    ax2.spines[axis].set_linewidth(1)
    ax3.spines[axis].set_linewidth(1)
ax1.tick_params(axis="both", width=1, labelsize=14)
ax2.tick_params(axis="both", width=1, labelsize=14)
ax3.tick_params(axis="both", width=1, labelsize=14)

ax1.set_title("25-60 keV")
ax2.set_title("60-80 keV")
ax3.set_title("80-200 keV")

ax1.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax2.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax3.ticklabel_format(style='sci', axis='y', scilimits=(0,0))

ax1.scatter(df1['REV']+df1['PHASE'], df1['CLEAN']/scale, s=s, marker="o", color='r', linewidth=2)
ax1.scatter(df1['REV']+df1['PHASE'], df1['MODEL']/scale, s=s, marker="v", color='g', linewidth=2)
ax1.grid(visible=True)


ax2.scatter(df2['REV']+df2['PHASE'], df2['CLEAN']/scale, s=s, marker="o", color='r', linewidth=2)
ax2.scatter(df2['REV']+df2['PHASE'], df2['MODEL']/scale, s=s, color='g', linewidth=2)
ax2.grid(visible=True)

ax3.scatter(df3['REV']+df3['PHASE'], df3['CLEAN']/scale, s=s, marker="o", color='r', linewidth=2)
ax3.scatter(df3['REV']+df3['PHASE'], df3['MODEL']/scale, s=s, color='g', linewidth=2)
ax3.grid(visible=True)

#x = np.arange(-10, 10, 0.001)
#plot normal distribution with mean 0 and standard deviation 1
#plt.plot(x, norm.pdf(x, 0, 1), color='red', linewidth=2)

plt.xlabel('Revolution',fontsize=14, fontweight='normal')
ax2.set_ylabel('Count rate, x10$^{-2}$ cts s$^{-1}$ pix$^{-1}$',fontsize=14, fontweight='normal')

#plt.xscale('linear')
#plt.yscale('linear')

plt.savefig(proddir+'bkgmodel_lightcurve.png', bbox_inches='tight')
plt.close(fig)

###
### Plot distribution
###

nbins=100
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=False, figsize=(9, 7), dpi=100)
#fig.suptitle('Vertically stacked subplots')
#plt.figure(figsize=(8, 6), dpi=80)

for axis in ['top','bottom','left','right']:
    ax1.spines[axis].set_linewidth(1)
    ax2.spines[axis].set_linewidth(1)
    ax3.spines[axis].set_linewidth(1)
ax1.tick_params(axis="both", width=1, labelsize=14)
ax2.tick_params(axis="both", width=1, labelsize=14)
ax3.tick_params(axis="both", width=1, labelsize=14)

ax1.ticklabel_format(style='sci', axis='y', scilimits=(-3,4))
ax2.ticklabel_format(style='sci', axis='y', scilimits=(-4,4))
ax3.ticklabel_format(style='sci', axis='y', scilimits=(-4,4))

data=df1['GRXE']
(mu, sg) = norm.fit(data)
print(mu, sg)
txt="25-60 keV\n$\\sigma=${:.0f} mCrab".format(sg)
ax1.text(35,8.5e-3,txt,fontsize=14)
n, bins, patches = ax1.hist(data, nbins, density=True, facecolor='green', alpha=0.75)
y = norm.pdf(bins, mu, sg)
l = ax1.plot(bins, y, 'r--', linewidth=2)
#plot
ax1.axvline(mu, color="black", linewidth=2)
ax1.axvline(mu+sg, color="blue", linestyle="dashed", linewidth=2)
ax1.axvline(mu-sg, color="blue", linestyle="dashed", linewidth=2)
ax1.grid(visible=True)

data=df2['GRXE']
(mu, sg) = norm.fit(data)
print(mu, sg)
txt="60-80 keV\n$\\sigma=${:.0f} mCrab".format(sg)
ax2.text(150,2.5e-3,txt,fontsize=14)
n, bins, patches = ax2.hist(data, nbins, density=True, facecolor='green', alpha=0.75)
y = norm.pdf(bins, mu, sg)
l = ax2.plot(bins, y, 'r--', linewidth=2)
#plot
ax2.axvline(mu, color="black", linewidth=2)
ax2.axvline(mu+sg, color="blue", linestyle="dashed", linewidth=2)
ax2.axvline(mu-sg, color="blue", linestyle="dashed", linewidth=2)
ax2.grid(visible=True)

data=df3['GRXE']
(mu, sg) = norm.fit(data)
print(mu, sg)

txt="80-200 keV\n$\\sigma=${:.0f} mCrab".format(sg)
ax3.text(200,2.5e-3,txt,fontsize=14)
n, bins, patches = ax3.hist(data, nbins, density=True, facecolor='green', alpha=0.75)
# add a 'best fit' line
y = norm.pdf(bins, mu, sg)
l = ax3.plot(bins, y, 'r--', linewidth=2)

area = np.sum(n * np.diff(bins))
xdata = bins[:-1]+np.diff(bins)/2
ydata = n
print("Initial Gaiss fit: mu={:.2f} sigma={:.2f}".format(mu,sg))
y_peak = norm.pdf(mu, mu, sg)
params=[
    y_peak*area, # height
    mu, # mu
    sg, # sigma
    0.0, # const 1
    0.0, # const 2
]
pfit, perr = fit_leastsq(params, xdata, ydata, const_gaussian_ff)
#plt.plot(bins, const_gaussian_ff(bins, pfit), c='black' )

#plot
ax3.axvline(mu, color="black", linewidth=2)
ax3.axvline(mu+sg, color="blue", linestyle="dashed", linewidth=2)
ax3.axvline(mu-sg, color="blue", linestyle="dashed", linewidth=2)
ax3.grid(visible=True)

#x = np.arange(-10, 10, 0.001)
#plot normal distribution with mean 0 and standard deviation 1
#plt.plot(x, norm.pdf(x, 0, 1), color='red', linewidth=2)

plt.xlabel('Flux, mCrab',fontsize=14, fontweight='normal')
#ax2.set_ylabel('No, x10$^{-3}$ cts s$^{-1}$ pix$^{-1}$',fontsize=14, fontweight='normal')

#plt.xscale('linear')
plt.yscale('linear')

filename=figdir+'bkgmodel_histogram.png'
plt.savefig(filename, bbox_inches='tight')
plt.close(fig)
print("\nResult is saved as {}".format(filename))


###
### Plot distribution of systematics
###

nbins=100
fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, figsize=(9, 7), dpi=100)
#fig.suptitle('Vertically stacked subplots')
#plt.figure(figsize=(8, 6), dpi=80)

for axis in ['top','bottom','left','right']:
    ax1.spines[axis].set_linewidth(1)
    ax2.spines[axis].set_linewidth(1)
    ax3.spines[axis].set_linewidth(1)
ax1.tick_params(axis="both", width=1, labelsize=14)
ax2.tick_params(axis="both", width=1, labelsize=14)
ax3.tick_params(axis="both", width=1, labelsize=14)

ax1.ticklabel_format(style='sci', axis='y', scilimits=(-3,4))
ax2.ticklabel_format(style='sci', axis='y', scilimits=(-4,4))
ax3.ticklabel_format(style='sci', axis='y', scilimits=(-4,4))

data=(df1['CLEAN']-df1['MODEL'])/df1['CLEAN']*100
(mu, sg) = norm.fit(data)
#print(mu, sg)
txt="25-60 keV, $\\sigma=${:.1f}%".format(sg)
ax1.set_title(txt)
n, bins, patches = ax1.hist(data, nbins, density=True, facecolor='green', alpha=0.75)
y = norm.pdf(bins, mu, sg)
l = ax1.plot(bins, y, 'r--', linewidth=2)
#plot
ax1.axvline(mu, color="black", linewidth=2)
ax1.axvline(mu+sg, color="blue", linestyle="dashed", linewidth=2)
ax1.axvline(mu-sg, color="blue", linestyle="dashed", linewidth=2)
ax1.grid(visible=True)


data=(df2['CLEAN']-df2['MODEL'])/df2['CLEAN']*100
(mu, sg) = norm.fit(data)
#print(mu, sg)
txt="60-80 keV, $\\sigma=${:.1f}%".format(sg)
ax2.set_title(txt)
n, bins, patches = ax2.hist(data, nbins, density=True, facecolor='green', alpha=0.75)
y = norm.pdf(bins, mu, sg)
l = ax2.plot(bins, y, 'r--', linewidth=2)
#plot
ax2.axvline(mu, color="black", linewidth=2)
ax2.axvline(mu+sg, color="blue", linestyle="dashed", linewidth=2)
ax2.axvline(mu-sg, color="blue", linestyle="dashed", linewidth=2)
ax2.grid(visible=True)


data=(df3['CLEAN']-df3['MODEL'])/df3['CLEAN']*100

(mu, sg) = norm.fit(data)
#print(mu, sg)

txt="80-200 keV, $\\sigma=${:.1f}%".format(sg)
ax3.set_title(txt)
n, bins, patches = ax3.hist(data, nbins, density=True, facecolor='green', alpha=0.75)
# add a 'best fit' line
y = norm.pdf(bins, mu, sg)
l = ax3.plot(bins, y, 'r--', linewidth=2)
#plot
ax3.axvline(mu, color="black", linewidth=2)
ax3.axvline(mu+sg, color="blue", linestyle="dashed", linewidth=2)
ax3.axvline(mu-sg, color="blue", linestyle="dashed", linewidth=2)
ax3.grid(visible=True)
ax3.set_xlim(-5,5)

#x = np.arange(-10, 10, 0.001)
#plot normal distribution with mean 0 and standard deviation 1
#plt.plot(x, norm.pdf(x, 0, 1), color='red', linewidth=2)

plt.xlabel('Residuals, %',fontsize=14, fontweight='normal')
#ax2.set_ylabel('No, x10$^{-3}$ cts s$^{-1}$ pix$^{-1}$',fontsize=14, fontweight='normal')

#plt.xscale('linear')
#plt.yscale('linear')

if not os.path.exists(figdir):
    os.makedirs(figdir)

filename=figdir+'bkgmodel_systematic.png'
plt.savefig(filename, bbox_inches='tight')
plt.close(fig)
print("Result is saved as {}".format(filename))