#!/usr/bin/env python __author__ = "Roman Krivonos" __copyright__ = "Space Research Institute (IKI)" import numpy as np import pandas as pd from astropy.io import fits from astropy.table import Table, Column import matplotlib.pyplot as plt import math, sys import pickle from sklearn.linear_model import LinearRegression from sklearn.linear_model import HuberRegressor from sklearn.linear_model import RANSACRegressor from sklearn.linear_model import TheilSenRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import RepeatedKFold #from statsmodels.robust.scale import huber from astropy.stats import sigma_clip from numpy import absolute from numpy import arange from ridge.utils import * from ridge.config import * plotme=True enkey = sys.argv[1] #outkey = sys.argv[2] outkey = "ALL" fn="detcnts.{}.fits".format(enkey) with open(proddir+fn.replace(".fits",".pkl"), 'rb') as fp: bgdmodel = pickle.load(fp) with open(proddir+fn.replace(".fits",".ignored_scw.pkl"), 'rb') as fp: ignored_scw = pickle.load(fp) with open(proddir+fn.replace(".fits",".ignored_rev.pkl"), 'rb') as fp: ignored_rev = pickle.load(fp) with open(proddir+fn.replace(".fits",".crabmodel.pkl"), 'rb') as fp: crabmodel, z = pickle.load(fp) p = np.poly1d(z) crabmodel_keys = list(crabmodel.keys()) #print(crabmodel) #sys.exit() crab_rev_max = np.max(list(crabmodel.keys())) print("Crab is defined untill orbit {}".format(crab_rev_max)) with fits.open(datadir+fn) as hdul: data=hdul[1].data #print(data.columns) rev = data.field('rev') mjd = data.field('mjd') clean = data.field('clean') phase = data.field('phase') texp = data.field('exposure') obsid0=[] rev0=[] phase0=[] clean0=[] model0=[] resid0=[] # residuals in cts/s grxe0=[] # mCrab grxe_err0=[] # mCrab crab0=[] # Crab count rate mjd0=[] a0=[] b0=[] model_err0=[] crab_err0=[] lon0=[] lat0=[] base0=[] c0=[] texp0=[] #d = fits.getdata(datadir+fn) #df = pd.DataFrame(np.array(d).byteswap().newbyteorder()) with fits.open(datadir+fn) as data: df = pd.DataFrame(data[1].data) # BKG if(outkey == 'BKG'): df = df.query('CLEAN > 0.0 & ( abs(LAT) > {} | abs(LON) > {}) & PHASE > {} & PHASE < {}'.format(bmax,lmax,phmin,phmax)) # GAL if(outkey=='GAL'): df = df.query('CLEAN > 0.0 & abs(LAT) < {} & abs(LON) < {} & PHASE > {} & PHASE < {}'.format(bmax,lmax,phmin,phmax)) # ALL if(outkey=='ALL'): df = df.query('CLEAN > 0.0 & PHASE > {} & PHASE < {}'.format(phmin,phmax)) for i, row in df.iterrows(): orbit=row['REV'] obsid=row['OBSID']#.decode("UTF-8") if not (orbit > revmin and orbit < revmax): print("Skip orbit",orbit,row['OBSID']) continue if not (orbit < crab_rev_max): print("Skip orbit",orbit,obsid) continue if (obsid in ignored_scw): print("Skip ScW",obsid) continue if (orbit in ignored_rev): print("Skip REV",orbit) continue a = bgdmodel[orbit]['a'] b = bgdmodel[orbit]['b'] c = bgdmodel[orbit]['c'] # Crab error is defined only for Crab resolutions, so we interpolate between if(orbit in crabmodel_keys): crab_err = crabmodel[orbit]['err'] else: left,right = find_nearest(crabmodel_keys, orbit) crab_err = np.interp(orbit, [left,right], [crabmodel[left]['err'], crabmodel[right]['err']]) #print() #print(orbit, left, right) #print(orbit, 'err', crabmodel[left]['err'], crab_err, crabmodel[right]['err']) #sys.exit() err = bgdmodel[orbit]['err'] m = a*row['PHASE']+b r1 = bgdmodel[orbit]['r1'] # nearest left orbit used for calibration r2 = bgdmodel[orbit]['r2'] # nearest right orbit used for calibration obsid0.append(obsid) c0.append(c) base0.append(abs(orbit - int(np.min([r1,r2])))) clean0.append(clean[i]) mjd0.append(mjd[i]) texp0.append(texp[i]) model0.append(m) resid0.append(clean[i]-m) grxe0.append(1000*(clean[i]-m)/p(orbit)) grxe_err0.append(1000*np.sqrt(err**2 + crab_err**2)/p(orbit)) crab0.append(p(orbit)) a0.append(a) b0.append(b) c0.append(c) model_err0.append(err) crab_err0.append(crab_err) phase0.append(row['PHASE']) rev0.append(orbit) lon0.append(row['LON']) lat0.append(row['LAT']) rev_min=np.min(rev0) rev_max=np.max(rev0) orbits=np.array(rev0) resid_arr=np.array(resid0) distr_val=[] distr_rev=[] for r in range(rev_min,rev_max): ind = np.nonzero(orbits == r) if not (len(resid_arr[ind])>10): continue distr_val.append(np.mean(resid_arr[ind])) distr_rev.append(r) dval=np.array(distr_val) drev=np.array(distr_rev) sigma=2 n_bins=20 filtered_data = sigma_clip(dval, sigma=sigma, maxiters=10, return_bounds=True) filtered_arr=filtered_data[0] filtered_min=filtered_data[1] filtered_max=filtered_data[2] print("Sigma clipping: min {:.2e} max {:.2e}".format(filtered_min,filtered_max)) print("length orig: {} taken: {} filtered: {}".format(len(dval),len(dval[filtered_arr.mask==False]),len(dval[filtered_arr.mask==True]))) sg_mean, sg_med, sg_std = sigma_clipped_stats(distr_val, sigma=sigma, maxiters=10) sg_sem = sem(dval[filtered_arr.mask==False]) print("Sigma clipping: mean {:.2e} med {:.2e} std {:.2e} ".format(sg_mean, sg_med, sg_std)) print(len(drev[filtered_arr.mask==True])) if(plotme): k=1.2 plt.hist(dval, bins=n_bins, range=[filtered_min*k, filtered_max*k]) plt.hist(dval[filtered_arr.mask], bins=n_bins, range=[filtered_min*k, filtered_max*k]) plt.axvline(sg_mean, color="black") #plt.axvline(sg_mean+sg_sem, color="black", linestyle="dashed") #plt.axvline(sg_mean-sg_sem, color="black", linestyle="dashed") plt.axvline(sg_mean+sg_std, color="blue", linestyle="dashed") plt.axvline(sg_mean-sg_std, color="blue", linestyle="dashed") plt.xlabel("Resid") plt.show() with open(proddir+fn.replace(".fits",".ignored_rev.resid.pkl"), 'wb') as fp: pickle.dump(drev[filtered_arr.mask==True], fp, protocol=pickle.HIGHEST_PROTOCOL) print("Removed REVs:",drev[filtered_arr.mask==True]) indices = sorted( range(len(mjd0)), key=lambda index: mjd0[index] ) coldefs = fits.ColDefs([ fits.Column(name='OBSID', format='11A', array=[obsid0[index] for index in indices]), #fits.Column(name='RA', format='D', unit='deg', array=[ra[index] for index in indices]), #fits.Column(name='DEC', format='D', unit='deg', array=[dec[index] for index in indices]), fits.Column(name='LON', format='D', unit='deg', array=[lon0[index] for index in indices]), fits.Column(name='LAT', format='D', unit='deg', array=[lat0[index] for index in indices]), fits.Column(name='REV', format='J', unit='', array=[rev0[index] for index in indices]), fits.Column(name='BASE', format='J', unit='', array=[base0[index] for index in indices]), # nearest BKG fits.Column(name='MJD', format='D', unit='', array=[mjd0[index] for index in indices]), fits.Column(name='TEXP', format='D', unit='', array=[texp0[index] for index in indices]), fits.Column(name='PHASE', format='D', unit='', array=[phase0[index] for index in indices]), fits.Column(name='CLEAN', format='D', unit='cts/s', array=[clean0[index] for index in indices]), fits.Column(name='MODEL', format='D', unit='cts/s', array=[model0[index] for index in indices]), fits.Column(name='MODEL_ERR', format='D', unit='', array=[model_err0[index] for index in indices]), fits.Column(name='RESID', format='D', unit='cts/s', array=[resid0[index] for index in indices]), fits.Column(name='GRXE', format='D', unit='mCrab', array=[grxe0[index] for index in indices]), fits.Column(name='GRXE_ERR', format='D', unit='mCrab', array=[grxe_err0[index] for index in indices]), fits.Column(name='CRAB', format='D', unit='cts/s', array=[crab0[index] for index in indices]), fits.Column(name='CRAB_ERR', format='D', unit='', array=[crab_err0[index] for index in indices]), fits.Column(name='A', format='D', unit='', array=[a0[index] for index in indices]), fits.Column(name='B', format='D', unit='', array=[b0[index] for index in indices]), fits.Column(name='C', format='D', unit='', array=[c0[index] for index in indices]), ]) fout = fn.replace(".fits",".{}.resid.fits".format(outkey)) hdu = fits.BinTableHDU.from_columns(coldefs, name='GRXE') hdu.header['MISSION'] = ('INTEGRAL', '') hdu.header['TELESCOP'] = (outkey, '') hdu.header['INSTITUT'] = ('IKI', 'Affiliation') hdu.header['AUTHOR'] = ('Roman Krivonos', 'Responsible person') hdu.header['EMAIL'] = ('krivonos@cosmos.ru', 'E-mail') #hdu.add_checksum() print(hdu.columns) hdu.writeto(proddir+fout, overwrite=True) with fits.open(proddir+fout, mode='update') as hdus: hdus[1].add_checksum() coldefs = fits.ColDefs([ fits.Column(name='REV', format='J', unit='', array=drev), fits.Column(name='RESID', format='D', unit='cts/s', array=dval), fits.Column(name='MASK', format='L', unit='', array=filtered_arr.mask), ]) fout = fn.replace(".fits",".{}.resid_by_rev.fits".format(outkey)) hdu = fits.BinTableHDU.from_columns(coldefs, name='GRXE') hdu.header['MISSION'] = ('INTEGRAL', '') hdu.header['TELESCOP'] = (outkey, '') hdu.header['INSTITUT'] = ('IKI', 'Affiliation') hdu.header['AUTHOR'] = ('Roman Krivonos', 'Responsible person') hdu.header['EMAIL'] = ('krivonos@cosmos.ru', 'E-mail') #hdu.add_checksum() print(hdu.columns) hdu.writeto(proddir+fout, overwrite=True) with fits.open(proddir+fout, mode='update') as hdus: hdus[1].add_checksum()