#!/usr/bin/env python __author__ = "Roman Krivonos" __copyright__ = "Space Research Institute (IKI)" import numpy as np import pandas as pd from astropy.io import fits import matplotlib.pyplot as plt import math, sys, os import pickle from sklearn.linear_model import LinearRegression from sklearn.linear_model import HuberRegressor from sklearn.linear_model import RANSACRegressor from sklearn.linear_model import TheilSenRegressor from sklearn.model_selection import cross_val_score from sklearn.model_selection import RepeatedKFold #from statsmodels.robust.scale import huber from astropy.stats import sigma_clip from numpy import absolute from numpy import arange from ridge.utils import * from ridge.config import * enkey = sys.argv[1] fn="detcnts.{}.fits".format(enkey) print("Reading {}".format(datadir+fn)) #d = fits.getdata(datadir+fn) #df=pd.DataFrame(np.array(d).byteswap().newbyteorder()) with fits.open(datadir+fn) as data: df = pd.DataFrame(data[1].data) print(df.columns) #interp_a = np.interp(50, [0,100], [0, 1]) #print(interp_a) #sys.exit() plotme=False sigma=3 ntotal=0 nrev=0 bgdmodel={} ignored_scw=[] ignored_rev=[] # ignore orbits with pecular slope over phase if not os.path.exists(proddir): os.makedirs(proddir) for rev in range(revmin,revmax): # if not (rev==341): # continue df0 = df.query('CLEAN > 0.0 & REV == {} & ( abs(LAT) > {} | abs(LON) > {}) & PHASE > {} & PHASE < {}'.format(rev,bmax,lmax,phmin,phmax)) #df0 = df.query('CLEAN > 0.0 & REV == {} & abs(LAT) > {} & abs(LON) > {} & PHASE > {} & PHASE < {}'.format(rev,bmax,lmax,phmin,phmax)) nobs=len(df0) print("*** REV {} *** {} ScWs".format(rev,df0.shape[0])) if not(nobs): continue ntotal=ntotal+nobs nrev=nrev+1 phase_diff0=max(df0['PHASE'])-min(df0['PHASE']) x = np.array(df0['PHASE'].values) y = np.array(df0['CLEAN'].values) scw = np.array(df0['OBSID'].values) if(nobs >= nmax): print("Phase diff: {:.2f} max allowed: {:.2f}".format(phase_diff0, phase_diff)) if(phase_diff0 > phase_diff): c = 0 """ run regression """ print("*** Run regression for {}".format(rev)) #print(df0['CLEAN'].values) x = x.reshape((-1, 1)) # https://machinelearningmastery.com/robust-regression-for-machine-learning-in-python/ #model = LinearRegression() #model = HuberRegressor() #model = RANSACRegressor() model = TheilSenRegressor() results = evaluate_model(x, y, model) a,b,err = plot_best_fit(x, y, model) # disallow positive slopes: if(a > 0.0): a = 0.0 b = np.mean(y) if(plotme): plot_ab(x, y, a, b, err, title="REGRESSION rev {}".format(rev)) else: c = 1 a = 0.0 try: filtered_data = sigma_clip(y, sigma=sigma, maxiters=10, return_bounds=True) filtered_y = filtered_data[0] filtered_min = filtered_data[1] filtered_max = filtered_data[2] b = np.mean(filtered_y) for s in scw[y > filtered_max]: ignored_scw.append(s.decode('UTF-8')) for s in scw[y < filtered_min]: ignored_scw.append(s.decode('UTF-8')) except: b = np.mean(y) print("case 1: mean {:6f}, normal mean {:6f}".format(b,np.mean(y))) err = np.sqrt(np.sum((y-b)**2))/len(y) if(plotme): plot_ab(x, y, a, b, err, title="Case 1, MEAN rev {}".format(rev)) elif(nobs > nmin and nobs < nmax): a = 0.0 c = 2 try: filtered_data = sigma_clip(y, sigma=sigma, maxiters=10, return_bounds=True) filtered_y = filtered_data[0] filtered_min = filtered_data[1] filtered_max = filtered_data[2] b = np.mean(filtered_y) for s in scw[y > filtered_max]: ignored_scw.append(s.decode('UTF-8')) for s in scw[y < filtered_min]: ignored_scw.append(s.decode('UTF-8')) except: b = np.mean(y) print("case 2: mean {:6f}, normal mean {:.6f}".format(b,np.mean(y))) err = np.sqrt(np.sum((y-b)**2))/len(y) if(plotme): plot_ab(x, y, a, b, err, title="Case 2, MEAN rev {}".format(rev)) else: # don't consider low number of ScWs a=-1 b=-1 err=-1 continue if(a > amin and a < amax): bgdmodel[rev]={'a':a, 'b':b, 'err':err, 'c':c, 'r1':rev, 'r2':rev} else: ignored_rev.append(rev) print("Revs: {} Total obs.: {}".format(nrev,ntotal)) keys = list(bgdmodel.keys()) for rev in range(revmin,revmax): if not (rev in keys): left,right = find_nearest(keys, rev) interp_a = np.interp(rev, [left,right], [bgdmodel[left]['a'], bgdmodel[right]['a']]) interp_b = np.interp(rev, [left,right], [bgdmodel[left]['b'], bgdmodel[right]['b']]) interp_err = np.interp(rev, [left,right], [bgdmodel[left]['err'], bgdmodel[right]['err']]) interp_c = np.interp(rev, [left,right], [bgdmodel[left]['c'], bgdmodel[right]['c']]) print() print(rev, left, right) print(rev, 'a', bgdmodel[left]['a'], interp_a, bgdmodel[right]['a']) print(rev, 'b', bgdmodel[left]['b'], interp_b, bgdmodel[right]['b']) print(rev, 'c', bgdmodel[left]['c'], interp_c, bgdmodel[right]['c']) #print(rev, interp_a, interp_b, interp_c, interp_err) bgdmodel[rev]={'a':interp_a, 'b':interp_b, 'err':interp_err, 'c':interp_c, 'r1':left, 'r2':right} with open(proddir+fn.replace(".fits",".pkl"), 'wb') as fp: pickle.dump(bgdmodel, fp, protocol=pickle.HIGHEST_PROTOCOL) with open(proddir+fn.replace(".fits",".ignored_scw.pkl"), 'wb') as fp: pickle.dump(ignored_scw, fp, protocol=pickle.HIGHEST_PROTOCOL) print("Removed ScWs:",ignored_scw) with open(proddir+fn.replace(".fits",".ignored_rev.pkl"), 'wb') as fp: pickle.dump(ignored_rev, fp, protocol=pickle.HIGHEST_PROTOCOL) print("Removed REVs:",ignored_rev)