ridge/scripts/00_stats.py

81 lines
3.1 KiB
Python
Executable File

#!/usr/bin/env python
__author__ = "Roman Krivonos"
__copyright__ = "Space Research Institute (IKI)"
import numpy as np
import pandas as pd
from astropy.io import fits
import matplotlib.pyplot as plt
import math, sys, os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
#from statsmodels.robust.scale import huber
from astropy.stats import sigma_clip
from numpy import absolute
from numpy import arange
from astropy.table import Table, Column
from ridge.utils import *
from ridge.config import *
#enkey = sys.argv[1]
enkey="A01"
inkey="ALL"
fn="detcnts.{}.fits".format(enkey)
print("Reading {}".format(datadir+fn))
with fits.open(datadir+fn) as data:
df = pd.DataFrame(data[1].data)
print(df.columns)
df_bkg = df.query('REV >= {} & REV< {} & CLEAN > 0.0 & ( abs(LAT) > {} | abs(LON) > {}) & PHASE > {} & PHASE < {}'.format(revmin,revmax,bmax,lmax,phmin,phmax))
df_gal = df.query('REV >= {} & REV< {} & CLEAN > 0.0 & abs(LAT) < {} & abs(LON) < {} & PHASE > {} & PHASE < {}'.format(revmin,revmax,bmax,lmax,phmin,phmax))
df_tot = df.query('REV >= {} & REV< {} & CLEAN > 0.0 & PHASE > {} & PHASE < {}'.format(revmin,revmax,phmin,phmax))
print("\n Initial data set")
print(" GAL {} ScWs, {:.1f} Ms".format(df_gal.shape[0], np.sum(df_gal['EXPOSURE'])/1e6))
print(" BKG {} ScWs, {:.1f} Ms".format(df_bkg.shape[0], np.sum(df_bkg['EXPOSURE'])/1e6))
print("Total {} ScWs, {:.1f} Ms".format(df_tot.shape[0], np.sum(df_tot['EXPOSURE'])/1e6))
fn="detcnts.{}.{}.resid.fits".format(enkey,inkey)
print("Reading {}".format(proddir+fn))
dat = Table.read(proddir+fn)
df = dat.to_pandas()
df_bkg = df.query('REV >= {} & REV< {} & CLEAN > 0.0 & ( abs(LAT) > {} | abs(LON) > {}) & PHASE > {} & PHASE < {}'.format(revmin,revmax,bmax,lmax,phmin,phmax))
df_gal = df.query('REV >= {} & REV< {} & CLEAN > 0.0 & abs(LAT) < {} & abs(LON) < {} & PHASE > {} & PHASE < {}'.format(revmin,revmax,bmax,lmax,phmin,phmax))
df_tot = df.query('REV >= {} & REV< {} & CLEAN > 0.0 & PHASE > {} & PHASE < {}'.format(revmin,revmax,phmin,phmax))
print("\n Residuals data set")
print(" GAL {} ScWs, {:.1f} Ms".format(df_gal.shape[0], np.sum(df_gal['TEXP'])/1e6))
print(" BKG {} ScWs, {:.1f} Ms".format(df_bkg.shape[0], np.sum(df_bkg['TEXP'])/1e6))
print("Total {} ScWs, {:.1f} Ms".format(df_tot.shape[0], np.sum(df_tot['TEXP'])/1e6))
# read ignored orbits
with open(ignored_rev_file, 'rb') as fp:
ignored_rev = pickle.load(fp)
print("{} orbits ignored".format(len(ignored_rev)))
ign=ignored_rev.tolist()
query = "REV != @ign"
df_bkg = df_bkg.query(query)
df_gal = df_gal.query(query)
df_tot = df_tot.query(query)
print("\n After removing ignored orbits:")
print(" GAL {} ScWs, {:.1f} Ms".format(df_gal.shape[0], np.sum(df_gal['TEXP'])/1e6))
print(" BKG {} ScWs, {:.1f} Ms".format(df_bkg.shape[0], np.sum(df_bkg['TEXP'])/1e6))
print("Total {} ScWs, {:.1f} Ms".format(df_tot.shape[0], np.sum(df_tot['TEXP'])/1e6))