#!/usr/bin/env python __author__ = "Roman Krivonos" __copyright__ = "Space Research Institute (IKI)" import numpy as np import pandas as pd from astropy.io import fits from astropy.table import Table, Column import matplotlib.pyplot as plt import math, sys import pickle from sklearn.linear_model import LinearRegression from sklearn.linear_model import HuberRegressor from sklearn.linear_model import RANSACRegressor from sklearn.linear_model import TheilSenRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import RepeatedKFold #from statsmodels.robust.scale import huber from astropy.stats import sigma_clip from astropy.stats import sigma_clipped_stats from scipy.stats import norm from scipy.stats import describe from scipy.stats import sem from numpy import absolute from numpy import arange from ridge.utils import * from ridge.config import * skey = sys.argv[1] enkey = sys.argv[2] if not skey in skyreg.keys(): print("{} not found in {}".format(skey,list(skyreg.keys()))) sys.exit() key = "ALL" fn="detcnts.{}.{}.resid.fits".format(enkey,key) print("Reading {}".format(proddir+fn)) dat = Table.read(proddir+fn) df = dat.to_pandas() n_bins = 60 sigma=3 df = df.query('LON > {} & LON < {} & LAT > {} & LAT < {} & BASE < {}'.format(skyreg[skey]['lon'] - skyreg[skey]['wlon']/2, skyreg[skey]['lon'] + skyreg[skey]['wlon']/2, skyreg[skey]['lat'] - skyreg[skey]['wlat']/2, skyreg[skey]['lat'] + skyreg[skey]['wlat']/2, basemin)) print("Selected {} ScWs".format(df.shape[0])) plt.scatter(df['LON'],df['LAT']) plt.title(skey) plt.ylabel("GLAT") plt.xlabel("GLON") plt.show() plt.scatter(df['REV'],df['BASE']) plt.title("Nearest orbit with measured background") plt.ylabel("Base") plt.xlabel("Orbit") plt.show() rev = np.array(df['REV']) grxe = np.array(df['GRXE']) grxe_err = np.array(df['GRXE_ERR']) mean = np.mean(grxe) std = np.std(grxe) print("mean {:.2f} std {:.2f}".format(mean,std)) print("min {:.2f}".format(grxe.min())) print("max {:.2f}".format(grxe.max())) print("mean {:.2f}".format(grxe.mean())) print("median {:.2f}".format(np.median(grxe))) print("var {:.2f}".format(grxe.var())) sstr = '%-14s mean = %6.4f, variance = %6.4f, skew = %6.4f, kurtosis = %6.4f' n, (smin, smax), sm, sv, ss, sk = describe(grxe) print(sstr % ('sample:', sm, sv, ss, sk)) # Calculate the percentiles across the x dimension errmax=np.max(grxe_err) perc = np.percentile(grxe_err, grxe_err_cut, axis=0, keepdims=False) print("{} {}: {}% cut of GRXE ERR: {:.2f} mCrab".format(skey,enkey,sem_cut,perc)) idx=np.where(grxe_err < perc) plt.hist(grxe_err, bins=n_bins, range=[0,errmax], color="red") rev=rev[idx] grxe=grxe[idx] grxe_err=grxe_err[idx] plt.hist(grxe_err, bins=n_bins, range=[0,errmax], color="grey") plt.xlabel("GRXE ERROR, mCrab") plt.title("Distribution of errors {}".format(skey)) plt.show() grxe_sign=np.divide(grxe,grxe_err) plt.hist(grxe_sign, bins=n_bins) plt.xlabel("GRXE S/N") plt.title("Distribution of significance {}".format(skey)) plt.show() A=np.sum(grxe/grxe_err**2) B=np.sum(1.0/grxe_err**2) wgt_mean=A/B wgt_mean_err=np.sqrt(1.0/B) print("Weighted mean: {:.2f}+/-{:.2f} normal mean: {:.2f}".format(wgt_mean,wgt_mean_err,np.mean(grxe))) #filtered_grxe = sigma_clip(grxe, sigma=sigma, maxiters=10) filtered_data = sigma_clip(grxe, sigma=sigma, maxiters=10, return_bounds=True) filtered_grxe = filtered_data[0] filtered_min = filtered_data[1] filtered_max = filtered_data[2] sg_mean, sg_med, sg_std = sigma_clipped_stats(grxe, sigma=sigma, maxiters=10) sg_sem = sem(grxe[filtered_grxe.mask==False]) print("Sigma clipping: mean {:.2f} med {:.2f} std {:.2f} sem {:.2f}".format(sg_mean, sg_med, sg_std, sg_sem)) k=1.2 plt.hist(grxe, bins=n_bins, range=[filtered_min*k, filtered_max*k]) plt.hist(grxe[filtered_grxe.mask], bins=n_bins, range=[filtered_min*k, filtered_max*k]) plt.axvline(sg_mean, color="black") plt.axvline(sg_mean+sg_std, color="blue", linestyle="dashed") plt.axvline(sg_mean-sg_std, color="blue", linestyle="dashed") plt.axvline(sg_mean+sg_sem, color="black", linestyle="dashed") plt.axvline(sg_mean-sg_sem, color="black", linestyle="dashed") plt.xlabel("GRXE, mCrab") plt.title(skey) plt.show() # show cumulative mean indices = sorted( range(len(rev)), key=lambda index: rev[index] ) rev_sorted = [rev[index] for index in indices] grxe_sorted = [grxe[index] for index in indices] plt.scatter(rev_sorted,grxe_sorted) plt.axhline(sg_mean, color="black") plt.axhline(sg_mean+sg_std, color="blue", linestyle="dashed") plt.axhline(sg_mean-sg_std, color="blue", linestyle="dashed") plt.axhline(sg_mean+sg_sem, color="black", linestyle="dashed") plt.axhline(sg_mean-sg_sem, color="black", linestyle="dashed") plt.ylim([filtered_min*k, filtered_max*k]) #plt.xlim([60, 100]) plt.title("GRXE as a function of orbit") plt.ylabel("GRXE, mCrab") plt.xlabel("Orbit") plt.show() #grxe_cum = np.cumsum(grxe_sorted, dtype=float, axis=0) #grxe_cum = cum_mean(grxe_sorted) grxe_cum = cum_mean_rev(rev_sorted,grxe_sorted) plt.scatter(rev_sorted,grxe_cum) plt.plot(rev_sorted,grxe_cum) plt.axhline(sg_mean, color="black") plt.axhline(grxe.mean(), color="green", linestyle="dashed") plt.axhline(sg_mean+sg_std, color="blue", linestyle="dashed") plt.axhline(sg_mean-sg_std, color="blue", linestyle="dashed") plt.axhline(sg_mean+sg_sem, color="black", linestyle="dashed") plt.axhline(sg_mean-sg_sem, color="black", linestyle="dashed") plt.ylim([filtered_min*k, filtered_max*k]) plt.title("Cumulative mean as a function of orbit") plt.ylabel("GRXE, mCrab") plt.xlabel("Orbit") plt.show()