generated from erosita/uds
169 lines
5.7 KiB
Python
Executable File
169 lines
5.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
__author__ = "Roman Krivonos"
|
|
__copyright__ = "Space Research Institute (IKI)"
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from astropy.io import fits
|
|
import matplotlib.pyplot as plt
|
|
import math, sys, os
|
|
import pickle
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.linear_model import HuberRegressor
|
|
from sklearn.linear_model import RANSACRegressor
|
|
from sklearn.linear_model import TheilSenRegressor
|
|
from sklearn.model_selection import cross_val_score
|
|
from sklearn.model_selection import RepeatedKFold
|
|
|
|
#from statsmodels.robust.scale import huber
|
|
from astropy.stats import sigma_clip
|
|
from numpy import absolute
|
|
from numpy import arange
|
|
|
|
from ridge.utils import *
|
|
from ridge.config import *
|
|
|
|
enkey = sys.argv[1]
|
|
|
|
fn="detcnts.{}.fits".format(enkey)
|
|
print("Reading {}".format(datadir+fn))
|
|
|
|
with fits.open(datadir+fn) as data:
|
|
df = pd.DataFrame(data[1].data)
|
|
|
|
# print(df.columns)
|
|
|
|
plotme=False
|
|
ntotal=0
|
|
nrev=0
|
|
bgdmodel={}
|
|
|
|
# will be populated
|
|
ignored_scw=[]
|
|
|
|
|
|
if not os.path.exists(proddir):
|
|
os.makedirs(proddir)
|
|
|
|
for rev in range(revmin,revmax):
|
|
|
|
df0 = df.query('CLEAN > 0.0 & REV == {} & ( abs(LAT) > {} | abs(LON) > {}) & PHASE > {} & PHASE < {}'.format(rev,bmax,lmax,phmin,phmax))
|
|
|
|
nobs=len(df0)
|
|
print("*** REV {} *** {} ScWs".format(rev,df0.shape[0]))
|
|
if not(nobs):
|
|
continue
|
|
ntotal=ntotal+nobs
|
|
nrev=nrev+1
|
|
phase_diff0=max(df0['PHASE'])-min(df0['PHASE'])
|
|
x = np.array(df0['PHASE'].values)
|
|
y = np.array(df0['CLEAN'].values)
|
|
scw = np.array(df0['OBSID'].values)
|
|
|
|
if(nobs >= nmax):
|
|
print("Phase diff: {:.2f} max allowed: {:.2f}".format(phase_diff0, phase_diff))
|
|
if(phase_diff0 > phase_diff):
|
|
c = 0
|
|
""" run regression """
|
|
print("*** Run regression for {}".format(rev))
|
|
|
|
x = x.reshape((-1, 1))
|
|
# https://machinelearningmastery.com/robust-regression-for-machine-learning-in-python/
|
|
#model = LinearRegression()
|
|
#model = HuberRegressor()
|
|
#model = RANSACRegressor()
|
|
model = TheilSenRegressor()
|
|
results = evaluate_model(x, y, model)
|
|
a,b,err = plot_best_fit(x, y, model)
|
|
|
|
# disallow positive slopes:
|
|
if(a > 0.0):
|
|
a = 0.0
|
|
b = np.mean(y)
|
|
|
|
if(plotme):
|
|
plot_ab(x, y, a, b, err, title="REGRESSION rev {}".format(rev))
|
|
else:
|
|
c = 1
|
|
a = 0.0
|
|
try:
|
|
filtered_data = sigma_clip(y, sigma=sigma, maxiters=10, return_bounds=True)
|
|
filtered_y = filtered_data[0]
|
|
filtered_min = filtered_data[1]
|
|
filtered_max = filtered_data[2]
|
|
b = np.mean(filtered_y)
|
|
for s in scw[y > filtered_max]:
|
|
ignored_scw.append(s.decode('UTF-8'))
|
|
for s in scw[y < filtered_min]:
|
|
ignored_scw.append(s.decode('UTF-8'))
|
|
|
|
except:
|
|
b = np.mean(y)
|
|
print("case 1: mean {:6f}, normal mean {:6f}".format(b,np.mean(y)))
|
|
err = np.sqrt(np.sum((y-b)**2))/len(y)
|
|
if(plotme):
|
|
plot_ab(x, y, a, b, err, title="Case 1, MEAN rev {}".format(rev))
|
|
|
|
elif(nobs > nmin and nobs < nmax):
|
|
a = 0.0
|
|
c = 2
|
|
try:
|
|
filtered_data = sigma_clip(y, sigma=sigma, maxiters=10, return_bounds=True)
|
|
filtered_y = filtered_data[0]
|
|
filtered_min = filtered_data[1]
|
|
filtered_max = filtered_data[2]
|
|
b = np.mean(filtered_y)
|
|
for s in scw[y > filtered_max]:
|
|
ignored_scw.append(s.decode('UTF-8'))
|
|
for s in scw[y < filtered_min]:
|
|
ignored_scw.append(s.decode('UTF-8'))
|
|
except:
|
|
b = np.mean(y)
|
|
print("case 2: mean {:6f}, normal mean {:.6f}".format(b,np.mean(y)))
|
|
err = np.sqrt(np.sum((y-b)**2))/len(y)
|
|
if(plotme):
|
|
plot_ab(x, y, a, b, err, title="Case 2, MEAN rev {}".format(rev))
|
|
else:
|
|
# don't consider low number of ScWs
|
|
a=-1
|
|
b=-1
|
|
err=-1
|
|
continue
|
|
|
|
if(a > amin and a < amax):
|
|
bgdmodel[rev]={'a':a, 'b':b, 'err':err, 'c':c, 'r1':rev, 'r2':rev}
|
|
else:
|
|
ignored_rev.append(rev)
|
|
|
|
print("Revs: {} Total obs.: {}".format(nrev,ntotal))
|
|
|
|
keys = list(bgdmodel.keys())
|
|
|
|
for rev in range(revmin,revmax):
|
|
if not (rev in keys):
|
|
left,right = find_nearest(keys, rev)
|
|
interp_a = np.interp(rev, [left,right], [bgdmodel[left]['a'], bgdmodel[right]['a']])
|
|
interp_b = np.interp(rev, [left,right], [bgdmodel[left]['b'], bgdmodel[right]['b']])
|
|
interp_err = np.interp(rev, [left,right], [bgdmodel[left]['err'], bgdmodel[right]['err']])
|
|
interp_c = np.interp(rev, [left,right], [bgdmodel[left]['c'], bgdmodel[right]['c']])
|
|
print()
|
|
print(rev, left, right)
|
|
print(rev, 'a', bgdmodel[left]['a'], interp_a, bgdmodel[right]['a'])
|
|
print(rev, 'b', bgdmodel[left]['b'], interp_b, bgdmodel[right]['b'])
|
|
print(rev, 'c', bgdmodel[left]['c'], interp_c, bgdmodel[right]['c'])
|
|
#print(rev, interp_a, interp_b, interp_c, interp_err)
|
|
bgdmodel[rev]={'a':interp_a, 'b':interp_b, 'err':interp_err, 'c':interp_c, 'r1':left, 'r2':right}
|
|
|
|
with open(proddir+fn.replace(".fits",".pkl"), 'wb') as fp:
|
|
pickle.dump(bgdmodel, fp, protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
with open(proddir+fn.replace(".fits",".ignored_scw.pkl"), 'wb') as fp:
|
|
pickle.dump(ignored_scw, fp, protocol=pickle.HIGHEST_PROTOCOL)
|
|
print("Removed ScWs:",ignored_scw)
|
|
|
|
with open(proddir+fn.replace(".fits",".ignored_rev.pkl"), 'wb') as fp:
|
|
pickle.dump(ignored_rev, fp, protocol=pickle.HIGHEST_PROTOCOL)
|
|
print("Removed REVs:",ignored_rev)
|