#!/usr/bin/env python __author__ = "Roman Krivonos" __copyright__ = "Space Research Institute (IKI)" import numpy as np import pandas as pd from astropy.io import fits from astropy.table import Table, Column import matplotlib.pyplot as plt import math, sys import pickle from sklearn.linear_model import LinearRegression from sklearn.linear_model import HuberRegressor from sklearn.linear_model import RANSACRegressor from sklearn.linear_model import TheilSenRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import RepeatedKFold #from statsmodels.robust.scale import huber from astropy.stats import sigma_clip from astropy.stats import sigma_clipped_stats from scipy.stats import norm from scipy.stats import describe from scipy.stats import sem from numpy import absolute from numpy import arange from ridge.utils import * from ridge.config import * key="BKG" enkey = sys.argv[1] fn="detcnts.{}.{}.resid.fits".format(enkey,key) print("Reading {}".format(proddir+fn)) dat = Table.read(proddir+fn) df = dat.to_pandas() print("Number of ScWs: {}".format(df.shape[0])) n_bins = 80 clean = np.array(df['CLEAN']) model = np.array(df['MODEL']) grxe = (clean-model)/clean*100 mean = np.mean(grxe) std = np.std(grxe) print("\n*** Unfiltered:") print("mean {:.2f} std {:.2f}".format(mean,std)) print("min {:.2f}".format(grxe.min())) print("max {:.2f}".format(grxe.max())) print("mean {:.2f}".format(grxe.mean())) print("median {:.2f}".format(np.median(grxe))) print("var {:.2f}".format(grxe.var())) sstr = '%-14s mean = %6.4f, variance = %6.4f, skew = %6.4f, kurtosis = %6.4f' n, (smin, smax), sm, sv, ss, sk = describe(grxe) print(sstr % ('sample:', sm, sv, ss, sk)) print("\n***") filtered_data = sigma_clip(grxe, sigma=sigma, maxiters=10, return_bounds=True) filtered_grxe=filtered_data[0] filtered_min=filtered_data[1] filtered_max=filtered_data[2] print("length orig: {} taken: {} filtered: {}".format(len(grxe),len(grxe[filtered_grxe.mask==False]),len(grxe[filtered_grxe.mask==True]))) sg_mean, sg_med, sg_std = sigma_clipped_stats(grxe, sigma=sigma, maxiters=10) sg_sem = sem(grxe[filtered_grxe.mask==False]) print("Sigma clipping: mean {:.2f} med {:.2f} std {:.2f} sem {:.2f}".format(sg_mean, sg_med, sg_std, sg_sem)) k=1.2 plt.hist(grxe, bins=n_bins, range=[filtered_min*k, filtered_max*k]) plt.hist(grxe[filtered_grxe.mask], bins=n_bins, range=[filtered_min*k, filtered_max*k]) plt.axvline(sg_mean, color="black") plt.axvline(sg_mean+sg_sem, color="black", linestyle="dashed") plt.axvline(sg_mean-sg_sem, color="black", linestyle="dashed") plt.axvline(sg_mean+sg_std, color="blue", linestyle="dashed") plt.axvline(sg_mean-sg_std, color="blue", linestyle="dashed") plt.xlabel("mCrab") plt.show()