ridge/scripts/01_bgdmodel.py
2024-10-31 13:56:16 +03:00

169 lines
5.7 KiB
Python
Executable File

#!/usr/bin/env python
__author__ = "Roman Krivonos"
__copyright__ = "Space Research Institute (IKI)"
import numpy as np
import pandas as pd
from astropy.io import fits
import matplotlib.pyplot as plt
import math, sys, os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
#from statsmodels.robust.scale import huber
from astropy.stats import sigma_clip
from numpy import absolute
from numpy import arange
from ridge.utils import *
from ridge.config import *
enkey = sys.argv[1]
fn="detcnts.{}.fits".format(enkey)
print("Reading {}".format(datadir+fn))
with fits.open(datadir+fn) as data:
df = pd.DataFrame(data[1].data)
# print(df.columns)
plotme=False
ntotal=0
nrev=0
bgdmodel={}
# will be populated
ignored_scw=[]
if not os.path.exists(proddir):
os.makedirs(proddir)
for rev in range(revmin,revmax):
df0 = df.query('CLEAN > 0.0 & REV == {} & ( abs(LAT) > {} | abs(LON) > {}) & PHASE > {} & PHASE < {}'.format(rev,bmax,lmax,phmin,phmax))
nobs=len(df0)
print("*** REV {} *** {} ScWs".format(rev,df0.shape[0]))
if not(nobs):
continue
ntotal=ntotal+nobs
nrev=nrev+1
phase_diff0=max(df0['PHASE'])-min(df0['PHASE'])
x = np.array(df0['PHASE'].values)
y = np.array(df0['CLEAN'].values)
scw = np.array(df0['OBSID'].values)
if(nobs >= nmax):
print("Phase diff: {:.2f} max allowed: {:.2f}".format(phase_diff0, phase_diff))
if(phase_diff0 > phase_diff):
c = 0
""" run regression """
print("*** Run regression for {}".format(rev))
x = x.reshape((-1, 1))
# https://machinelearningmastery.com/robust-regression-for-machine-learning-in-python/
#model = LinearRegression()
#model = HuberRegressor()
#model = RANSACRegressor()
model = TheilSenRegressor()
results = evaluate_model(x, y, model)
a,b,err = plot_best_fit(x, y, model)
# disallow positive slopes:
if(a > 0.0):
a = 0.0
b = np.mean(y)
if(plotme):
plot_ab(x, y, a, b, err, title="REGRESSION rev {}".format(rev))
else:
c = 1
a = 0.0
try:
filtered_data = sigma_clip(y, sigma=sigma, maxiters=10, return_bounds=True)
filtered_y = filtered_data[0]
filtered_min = filtered_data[1]
filtered_max = filtered_data[2]
b = np.mean(filtered_y)
for s in scw[y > filtered_max]:
ignored_scw.append(s.decode('UTF-8'))
for s in scw[y < filtered_min]:
ignored_scw.append(s.decode('UTF-8'))
except:
b = np.mean(y)
print("case 1: mean {:6f}, normal mean {:6f}".format(b,np.mean(y)))
err = np.sqrt(np.sum((y-b)**2))/len(y)
if(plotme):
plot_ab(x, y, a, b, err, title="Case 1, MEAN rev {}".format(rev))
elif(nobs > nmin and nobs < nmax):
a = 0.0
c = 2
try:
filtered_data = sigma_clip(y, sigma=sigma, maxiters=10, return_bounds=True)
filtered_y = filtered_data[0]
filtered_min = filtered_data[1]
filtered_max = filtered_data[2]
b = np.mean(filtered_y)
for s in scw[y > filtered_max]:
ignored_scw.append(s.decode('UTF-8'))
for s in scw[y < filtered_min]:
ignored_scw.append(s.decode('UTF-8'))
except:
b = np.mean(y)
print("case 2: mean {:6f}, normal mean {:.6f}".format(b,np.mean(y)))
err = np.sqrt(np.sum((y-b)**2))/len(y)
if(plotme):
plot_ab(x, y, a, b, err, title="Case 2, MEAN rev {}".format(rev))
else:
# don't consider low number of ScWs
a=-1
b=-1
err=-1
continue
if(a > amin and a < amax):
bgdmodel[rev]={'a':a, 'b':b, 'err':err, 'c':c, 'r1':rev, 'r2':rev}
else:
ignored_rev.append(rev)
print("Revs: {} Total obs.: {}".format(nrev,ntotal))
keys = list(bgdmodel.keys())
for rev in range(revmin,revmax):
if not (rev in keys):
left,right = find_nearest(keys, rev)
interp_a = np.interp(rev, [left,right], [bgdmodel[left]['a'], bgdmodel[right]['a']])
interp_b = np.interp(rev, [left,right], [bgdmodel[left]['b'], bgdmodel[right]['b']])
interp_err = np.interp(rev, [left,right], [bgdmodel[left]['err'], bgdmodel[right]['err']])
interp_c = np.interp(rev, [left,right], [bgdmodel[left]['c'], bgdmodel[right]['c']])
print()
print(rev, left, right)
print(rev, 'a', bgdmodel[left]['a'], interp_a, bgdmodel[right]['a'])
print(rev, 'b', bgdmodel[left]['b'], interp_b, bgdmodel[right]['b'])
print(rev, 'c', bgdmodel[left]['c'], interp_c, bgdmodel[right]['c'])
#print(rev, interp_a, interp_b, interp_c, interp_err)
bgdmodel[rev]={'a':interp_a, 'b':interp_b, 'err':interp_err, 'c':interp_c, 'r1':left, 'r2':right}
with open(proddir+fn.replace(".fits",".pkl"), 'wb') as fp:
pickle.dump(bgdmodel, fp, protocol=pickle.HIGHEST_PROTOCOL)
with open(proddir+fn.replace(".fits",".ignored_scw.pkl"), 'wb') as fp:
pickle.dump(ignored_scw, fp, protocol=pickle.HIGHEST_PROTOCOL)
print("Removed ScWs:",ignored_scw)
with open(proddir+fn.replace(".fits",".ignored_rev.pkl"), 'wb') as fp:
pickle.dump(ignored_rev, fp, protocol=pickle.HIGHEST_PROTOCOL)
print("Removed REVs:",ignored_rev)