srg/srglib/genericsourceutils.py
2024-04-26 12:43:00 +03:00

895 lines
43 KiB
Python

from astropy.coordinates import SkyCoord # High-level coordinates
from astropy.coordinates import ICRS, Galactic, FK4, FK5 # Low-level frames
from astropy.coordinates import Angle, Latitude, Longitude # Angles
from ErositaDailySourceCatalogues.models import ErositaDailyCatalog, ErositaSource
from genericsource.models import GenericSource, GenericConnection, GenericCatalog, SrcAuxDATA
import pandas as pd
from astropy.io import fits
from astropy.time import Time, TimeDelta
from astropy_healpix import HEALPix, neighbours
from astropy import units as au
from django_pandas.io import read_frame
from srglib import correlate_utils, check_transient
from srglib.utils import make_source_name
import numpy as np
import re
import glob
import pickle
from math import pi, cos, sin
from scipy.sparse import coo_matrix
from astropy.time import Time, TimeDelta, TimezoneInfo, TimeFromEpoch
from erosurvey.models import NSIDE_PLATES, ORDER_PLATES
from heasarc.models import NSIDE_SOURCES, ORDER, HeasarcBase, GreenSNR, RosatExpMap
from auxcatalogues.models import SimbadSource, RXS2, SXPS2, CSC2, \
XMM3DR8, ZTFSource, SDSSSpec, GAIASource, ZTFOid, ZTFlc, AllWISEgs, \
NVSSgs
from django.core.exceptions import ObjectDoesNotExist
from django.db.models.functions.math import Sqrt, Cos, Power, Abs
from django.db.models import F, Q, Max, Subquery, OuterRef, Min, Count, Value
from django.contrib.contenttypes.models import ContentType
import datetime
from time import ctime
import pytz
import subprocess
#xspec = {}
"""
import xspec
from gdpyc import GasMap
from plotly import graph_objects as go
"""
#==============================================================
import os
os.environ["HOME"] = "/tmp/djangoastropy/"
if not os.path.exists("/tmp/djangoastropy/.astropy"): os.makedirs("/tmp/djangoastropy/.astropy")
from astroquery.simbad import Simbad as aSimbad
from astroquery.vizier import Vizier
import logging
import requests
import json
import pprint
import pandas
MJDREF = 51543.875
TZ_UTC = TimezoneInfo(utc_offset=0*au.hour)
TZ_MSK = TimezoneInfo(utc_offset=3*au.hour)
TIMESTR_FORMAT = '%Y.%m.%d %H:%M:%S'
xraycats = [ContentType.objects.get_for_model(m) for m in [RXS2, CSC2, XMM3DR8, SXPS2]] #, ErositaSource]]
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
hp_plates = HEALPix(nside=NSIDE_PLATES,
order=ORDER_PLATES, frame=FK5())
def mission2date(timesec):
mjdref = Time(MJDREF, format='mjd')
delta = TimeDelta(timesec, format='sec')
dtime = mjdref + delta + 3*au.hour
return dtime.to_datetime(timezone=TZ_MSK)
def str2date(timestr, tzone):
time_raw = datetime.strptime(timestr, TIMESTR_FORMAT)
time_zoned = time_raw.replace(tzinfo=tzone)
return time_zoned
def date2mission(dtime):
mjdref = Time(MJDREF, format='mjd')
dtime = Time(dtime , format='datetime')
return (dtime - mjdref).sec
def check_catsources_transient(cat):
check_sources_transient(cat.erotranssource_set.all())
cat.ntransients = cat.erotranssource_set.filter(Q(transient=True) | Q(rosat_transient=True)).count()
cat.save()
from astrobasis.models import GAIADR2 #container of ALL gaia sources DR2
def find_from_old_gaia_db(srcquery, neighsearch=1, **kwargs):
"""
for all sources in srcquery, which is assumed to have many-to-many relationtip to counterpatscat catalog,
performs search of sources in counterpart catalog within max(minrad[=30], 3*sqrt(src.poserr**2 + counterpart.poserr**2))
Parameters:
----------
srcquery: django.srcquery - iterable providing access to catalog under consideration
counterpartscat: catalog to search counterparts in
counterpartname: name of the many-to-many relationtip field in srcquery
**kwargs: additional arguments provided to makeposerrdist functions (see doc within)
"""
scat = read_frame(srcquery)
uhealpix = np.unique(scat.healpix.values)
uhealpix = np.unique(np.concatenate([uhealpix, neighbours(uhealpix, NSIDE_SOURCES, order=ORDER).ravel()]))
ccat = read_frame(GAIADR2.objects.filter(healpix__in=uhealpix))
ccat.drop(columns=["filename", "id", "ra_error", "dec_error"], inplace=True)
if scat.index.size == 0 or ccat.index.size == 0:
return 0
gexist = GAIASource.objects.filter(healpix__in=uhealpix)
find_counterparts(srcquery, gexist, neighsearch, **kwargs)
mask = np.isin(ccat.source_id.values, gexist.values_list("source_id"), invert=True)
ccat = ccat[mask]
ccat.error_radius.fillna(0., inplace=True)
dmat = correlate_utils.makeposerrdist(scat, ccat, **kwargs)
print(dmat.data)
uidx, ucts = np.unique(dmat.row, return_counts=True)
cs = np.empty(ucts.size + 1, np.int)
cs[0] = 0
cs[1:] = ucts.cumsum()
ce = np.lib.stride_tricks.as_strided(cs, (cs.size - 1, 2), cs.strides*2)
glist = []
gnewlist = []
gaiacat = GenericCatalog.objects.get(name="GAIA DR2")
for sid, cid, d in zip(scat.iloc[dmat.row].id.values, dmat.col, dmat.data):
src1 = srcquery.get(id=sid)
src2 = GAIASource.objects.create(**ccat.iloc[cid])
gaiacat.genericsource_set.add(src2)
glist.append(GenericConnection(connected=src1, connectto=src2, separation=d))
glist.append(GenericConnection(connected=src2, connectto=src1, separation=d))
GenericConnection.objects.bulk_create(glist, ignore_conflicts=True)
def update_catalog_connections(newsrc):
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=CSC2), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SXPS2), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=XMM3DR8), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=RXS2), minrad=20., maxdist=80.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SDSSSpec), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(Q(instance_of=ErositaSource) & Q(erositasource__obsdate__lt=cat.stop - datetime.timedelta(days=10))), minrad=3, maxdist=20)
load_simbad_auxcat(newsrc)
load_ztf_auxcat(newsrc, maxdist=10, minrad=3)
load_allwise_auxcat(newsrc, maxdist=10, minrad=3)
load_NVSS_auxcat(newsrc, maxdist=10, minrad=3)
newsrc = newsrc.annotate(xct=Count("connections", filter=Q(connections__polymorphic_ctype__in=xraycats)))
hasxcounterparts = newsrc.filter(xct__gt=0)
hasxcounterparts.annotate(mcxflux=Max("connections__sxflux")).filter(sxflux__gt=F("mcxflux")*Value(10.)).update(transient=True)
GenericCatalog.objects.get(name="Erosita flux transients").genericsource_set.add(*hasxcounterparts)
hasnoxcounterparts = newsrc.filter(xct=0).annotate(rassexp=Subquery(RosatExpMap.objects.filter(healpix=OuterRef("healpix")).values("exposure")[:1]))
hasnoxcounterparts = hasnoxcounterparts.annotate(rasslim=Value(4.*5.76*0.89211e-12)*Power(Value(1.) + Power(F("rassexp")/Value(425), Value(4.)), Value(0.15))/F("rassexp"))
hasnoxcounterparts.filter(rassexp__gt=0, sxflux__gt=F("rasslim")*Value(10.)).update(rosat_transient=True)
load_simbad_auxcat(newsrc)
def read_trans_pickle(filename):
data = pandas.read_pickle(filename)
data.rename(columns={"RA":"ra", "DEC":"dec", "L":"lii", "B":"bii", "CTS":"ML_CTS_0", "LIKE": "DET_LIKE_0",}, inplace=True)
sc = SkyCoord(data.ra.values, data.dec.values, unit="deg", frame="fk5")
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
hp_plate = HEALPix(nside=NSIDE_PLATES,
order=ORDER_PLATES,
frame=FK5())
data["healpix"] = hp.skycoord_to_healpix(sc)
data["healpix_plate"] = hp_plate.skycoord_to_healpix(sc)
data["name"] = [make_source_name("SRGe", r, d) for r, d in zip(data.ra.values, data.dec.values)]
return data.drop(columns=["LIKE_REF", "REF_FLUX", "D2D", "LIKE_REF", "ID_REF", "EXT_REF", \
"CTS_REF", "EXP", "EXP_REF", "FLAG", "G_NH", "g_d2d", "g_s", \
"g_id", "g_ra", "g_dec", "s_d2d", "s_id", "s_z", "s_otype", \
"sdss_d2d", "sdss_id", "sdss_z", "FLUX", "ID_SRC", "RATIO"])
def read_esrc_pack(filename):
ftime = datetime.datetime.strptime(ctime(os.path.getctime(filename)), "%a %b %d %H:%M:%S %Y")
df = pandas.read_pickle(filename)
df.rename(columns={"RADEC_ERR": "error_radius",
"srcID": "srcid", "ML_EXP_1":'APE_EXP_1'}, inplace=True)
if "RA_fin" in df.columns.values:
df.rename(columns={"RA_fin":"ra", "DEC_fin":"dec"}, inplace=True)
elif "RA_corr" in df.columns.values:
df.rename(columns={"RA_corr":"ra", "DEC_corr":"dec"}, inplace=True)
else:
df.rename(columns={"RA":"ra", "DEC":"dec"}, inplace=True)
sc = SkyCoord(df.ra.values, df.dec.values, unit="deg", frame="fk5")
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
hp_plate = HEALPix(nside=NSIDE_PLATES,
order=ORDER_PLATES,
frame=FK5())
df["lii"] = sc.galactic.l.value
df["bii"] = sc.galactic.b.value
df["healpix"] = hp.skycoord_to_healpix(sc)
df["healpix_plate"] = hp_plate.skycoord_to_healpix(sc)
df["name"] = [make_source_name("SRGe", r, d) for r, d in zip(df.ra.values, df.dec.values)]
df["id"] = df.index.values
df["sxflux"] = df.ML_FLUX_0.values
df["obsdate"] = (Time(51543.87, format="mjd") + TimeDelta((df.TSTART.values + df.TSTOP.values)/2., format="sec")).to_datetime(timezone=pytz.UTC) #datetime.timezone.utc)
df["tstart"] = (Time(51543.87, format="mjd") + TimeDelta(df.TSTART.values, format="sec")).to_datetime(timezone=pytz.UTC)
df["tstop"] = (Time(51543.87, format="mjd") + TimeDelta(df.TSTOP.values, format="sec")).to_datetime(timezone=pytz.UTC)
df["detected"] = True
return df.drop(columns=df.columns.difference([f.name for f in ErositaSource._meta.get_fields()] + ["id",])), ftime
def setup_xspec(xspec):
"""Setup xspec parameters."""
xspec.Fit.statMethod = 'cstat'
xspec.Fit.query = 'yes'
xspec.Fit.nIterations = 100
xspec.Xset.abund = 'wilm'
xspec.Plot.area = True
xspec.Plot.xLog = True
xspec.Plot.yLog = True
xspec.Plot.xAxis = 'keV'
xspec.Plot.setRebin(1.5, 20)
def fit_powerlaw_v2(fname, nH=0.006, G=2.0, norm=1e-4, fit_nH=True, pdfname=None):
# Initialization
dirpath = os.path.dirname(fname)
cwd = os.getcwd()
os.chdir(dirpath if dirpath else "./")
#xspec.Xset.openLog(fname[:-4] + "log")
xspec.Xset.chatter = 0
xspec.AllData.clear()
xspec.AllModels.clear()
xspec.AllChains.clear()
# Fitting
sp = xspec.AllData(fname)
xspec.AllData.ignore('**-0.3 8.-**')
xspec.AllData.ignore('bad')
model = xspec.Model('tbabs*pow')
model.TBabs.nH.values = [nH, 0.001, nH, nH, 20, 20]
model.powerlaw.PhoIndex.values = [G, 0.01, 0.1, 0.1, 10, 10]
model.powerlaw.norm.values = [norm, 1e-6, 1e-10, 1e-10, 100, 100]
# first, fit the normalization
model.TBabs.nH.frozen = True
model.powerlaw.PhoIndex.frozen = True
xspec.Fit.perform()
model.TBabs.nH.frozen = False
model.powerlaw.PhoIndex.frozen = False
if not fit_nH:
model.TBabs.nH.frozen = True
# than make a full fit
xspec.Fit.perform()
xspec.Fit.error('stop 1000,1.,2')
xspec.Fit.perform()
xspec.Fit.error('stop 1000,1.,2')
xspec.Fit.perform()
if fit_nH:
xspec.Fit.error('stop 1000,1.,1')
xspec.Fit.perform()
#xspec.Fit.error('1. 1-3')
#bestfit = XContainer(srcID)
#bestfit.get_session()
#xspec.Xset.closeLog()
xspec.Plot.device = "/null"
xspec.Plot("eeuf")
x, y, xe, ye, mo = xspec.Plot.x(), xspec.Plot.y(), xspec.Plot.xErr(), xspec.Plot.yErr(), xspec.Plot.model()
return model, np.array([x, y, xe, ye, mo])
def add_aux_data(srcid, survey, coord=None, newsourceid=None, xspec=None):
if xspec is None:
import xspec
from gdpyc import GasMap
#from plotly import graph_objects as go
specfile = glob.glob("/data/erosita/transients/products/src_%d_020_SourceSpec_00001_e%d.fits*" % (srcid, survey))
bkgfile = glob.glob("/data/erosita/transients/products/src_%d_020_BackgrSpec_00001_e%d.fits*" % (srcid, survey))
arffile = glob.glob("/data/erosita/transients/products/src_%d_020_ARF_00001_e%d.fits*" % (srcid, survey))
rmffile = glob.glob("/data/erosita/transients/products/src_%d_020_RMF_00001_e%d.fits*" % (srcid, survey))
specimg = glob.glob("/data/erosita/transients/products/src_%d_020_SourceSpec_00001_e%d.pdf" % (srcid, survey))
phimg = glob.glob("/data/erosita/transients/products/trans_%d_e%d.png" % (srcid, survey))
specmo = glob.glob("/data/erosita/transients/products/src_%d_020_SourceSpec_00001_e%d.pdf" % (srcid, survey))
lcpdf = glob.glob("/data/erosita/transients/products/lc*_%d_e%d.pdf" % (srcid, survey))
res = {}
if specfile: res["spec"] = os.path.basename(specfile[0])
if bkgfile: res["bkg"] = os.path.basename(bkgfile[0])
if arffile: res["arf"] = os.path.basename(arffile[0])
if rmffile: res["rmf"] = os.path.basename(rmffile[0])
if specimg: res["specimg"] = os.path.basename(specimg[0])
if phimg: res["img"] = os.path.basename(phimg[0])
if specmo: res["xcm"] = os.path.basename(specmo[0])
if lcpdf: res["lcpdf"] = os.path.basename(lcpdf[0])
print(res, newsourceid)
cwd = os.getcwd()
os.chdir("/data/erosita/transients/products")
rname = "" if not "spec" in res else res["spec"]
if newsourceid:
dpath = "/data/products/erotrans/%d" % newsourceid
ppath = "/data/erosita/transients/products/"
if not os.path.exists(dpath):
os.mkdir(dpath)
if "spec" in res: subprocess.run(["cp", os.path.join(ppath, res["spec"]), os.path.join(dpath, res["spec"])])
if "bkg" in res: subprocess.run(["cp", os.path.join(ppath, res["bkg"]), os.path.join(dpath, res["bkg"])])
if "arf" in res: subprocess.run(["cp", os.path.join(ppath, res["arf"]), os.path.join(dpath, res["arf"])])
if "rmf" in res: subprocess.run(["cp", os.path.join(ppath, res["rmf"]), os.path.join(dpath, res["rmf"])])
if "specimg" in res: subprocess.run(["cp", os.path.join(ppath, res["specimg"]), os.path.join(dpath, res["specimg"])])
if "img" in res: subprocess.run(["cp", os.path.join(ppath, res["img"]), os.path.join(dpath, res["img"])])
if "xcm" in res: subprocess.run(["cp", os.path.join(ppath, res["xcm"]), os.path.join(dpath, res["xcm"])])
if "lcpdf" in res: subprocess.run(["cp", os.path.join(ppath, res["lcpdf"]), os.path.join(dpath, res["lcpdf"])])
os.chdir(dpath)
p = subprocess.Popen(["echo", "group min 5 & exit"], stdout=subprocess.PIPE)
if "spec" in res:
rname = res["spec"][:-5] + "_r5.fits"
subprocess.run(["grppha", res["spec"], rname], stdin=p.stdout)
cts = 0 if not "spec" in res else fits.getheader(res["spec"], 1)["CTS"]
try:
if cts > 20 or coord is None:
model, pdata = fit_powerlaw_v2(rname)
else:
model, pdata = fit_powerlaw_v2(rname, fit_nH=False, nH=GasMap(coord, nhmap="HI4PI",hires=True).value/1e22)
except:
print("fail to fit")
else:
res["gamma"] = model.powerlaw.PhoIndex.values[0]
res["nh"] = model.TBabs.nH.values[0]*1e21
res["norm"] = model.powerlaw.norm.values[0]
res["gammael"] = model.powerlaw.PhoIndex.error[0]
res["gammaeu"] = model.powerlaw.PhoIndex.error[1]
pickle.dump(pdata, open(os.path.join(dpath, 'pdata.pickle'), 'wb'))
x, y, xe, ye, mo = pdata
fig = go.Figure(data=[go.Scatter(x=x, y=y, error_x={"type":"data", "array":xe}, error_y={"type":"data", "array":ye}, mode="markers"),
go.Scatter(x=x, y=mo, mode="lines")], layout=dict(width=1000))
fig.update_xaxes(type="log")
fig.update_yaxes(type="log")
open(os.path.join(dpath, "spec.html"), "w").write(fig.to_html(full_html=False))
return res
def load_erosita_trans_daily(filename, survey, apps=5, load_cat="Erosita surveys transients"):
ncat, ftime = read_esrc_pack(filename)
ncat["survey"] = survey
return load_erosita_trans_daily_cat(ncat, ftime, apps, load_cat)
def load_erosita_trans_daily_cat(ncat, ftime, apps=5, load_cat="Erosita surveys transients"):
import xspec
from gdpyc import GasMap
from plotly import graph_objects as go
"""
there is a new type of file, which contains only new transients between first and second survey
"""
cat = GenericCatalog.objects.get(name = load_cat)
if cat.stop == ftime:
print("latest version already loaded")
return None
catsrc = ErositaSource.objects.filter(id__in=cat.genericsource_set.all())
ecat = read_frame(catsrc)
urow = np.empty(0, np.int)
ucol = np.empty(0, np.int)
if ecat.index.size:
maske = np.ones(ecat.index.size, np.bool)
maskn = np.ones(ncat.index.size, np.bool)
eidx = np.arange(ecat.index.size)
nidx = np.arange(ncat.index.size)
for i in range(21):
if not (np.any(maske) and np.any(maskn)):
break
row, col = correlate_utils.get_closest_unique(ecat[maske], ncat[maskn], apps)
if row.size == 0 or np.all(~maske) or np.all(~maskn):
break
urow = np.concatenate([urow, eidx[maske][row]])
ucol = np.concatenate([ucol, nidx[maskn][col]])
maske[urow] = False
maskn[ucol] = False
if urow.size:
mask = np.array([abs((ecat.iloc[i].obsdate - ncat.iloc[j].obsdate).days) for i, j in zip(urow, ucol)]) < 3
urow, ucol = urow[mask], ucol[mask]
setup_xspec(xspec)
ncatidd = ncat.drop(columns=["id",])
print(urow.size, ncat.index.size)
print("list of existing sources", urow, ucol)
for i, j in zip(urow, ucol):
src = catsrc.filter(id__in=[ecat.iloc[i].id,])
src.update(**ncatidd.iloc[j])
src = src.first()
sc = SkyCoord(src.ra, src.dec, unit="deg", frame="fk5")
print(src.name, ncat.iloc[j].name)
try:
auxdata = add_aux_data(ncat.iloc[j].name, src.survey, sc, src.id, xspec)
except Exception:
print("warn: no spectum info for %d found" % ncat.iloc[j].id)
else:
print("auxdata", auxdata)
try:
adata = src.srcauxdata
print(adata)
except:
print(src)
print(src.name, src.id)
adata = SrcAuxDATA.objects.create(src=src)
print("update source", auxdata)
for key, val in auxdata.items():
adata.__setattr__(key, val)
print(key, val, getattr(adata, key))
adata.save()
"""
if urow.size != ecat.index.size:
mask = np.ones(ecat.index.size, np.bool)
mask[urow] = False
catr = catsrc.filter(id__in=ecat[mask].id.values)
cat.genericsource_set.remove(*catr)
GenericCatalog.objects.get(name="Erosita surveys transients, removed").genericsource_set.add(*catr)
"""
if ucol.size < ncat.index.size:
catsrc.filter(srcstatus="new").update(srcstatus="")
idx = np.arange(ncat.index.size)
idx = idx[np.isin(idx, ucol, invert=True, assume_unique=True)]
newsrc = []
for i in idx:
src = ErositaSource.objects.create(**ncatidd.iloc[i], srcstatus="new")
sc = SkyCoord(src.ra, src.dec, unit="deg", frame="fk5")
print("processing", ncat.iloc[i].name, src.id)
try:
auxdata = add_aux_data(ncat.iloc[i].name, src.survey, sc, src.id, xspec)
except Exception:
print("warn: no spectum info found")
else:
pass
print('create new source', auxdata)
SrcAuxDATA.objects.create(src=src, **auxdata)
#SrcAuxDATA.objects.create(src=src, **auxdata)
newsrc.append(src)
cat.genericsource_set.add(*newsrc)
newsrc = cat.genericsource_set.filter(id__in=[src.id for src in newsrc])
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=CSC2), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SXPS2), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=XMM3DR8), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=RXS2), minrad=20., maxdist=80.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SDSSSpec), minrad=3., maxdist=20.)
#find_from_old_gaia_db(newsrc, minrad=5, maxdist=20)
#load_allwise_auxcat(newsrc, maxdist=10, minrad=3)
#find_counterparts(newsrc, GenericSource.objects.filter(instance_of=AllWISEgs), minrad=3., maxdist=20.)
#load_NVSS_auxcat(newsrc, maxdist=10, minrad=3)
#find_counterparts(newsrc, GenericSource.objects.filter(instance_of=NVSSgs), minrad=3., maxdist=20.)
#find_counterparts(newsrc, GenericSource.objects.filter(Q(instance_of=ErositaSource) & Q(erositasource__obsdate__lt=cat.stop - datetime.timedelta(days=10))), minrad=3, maxdist=20)
#load_simbad_auxcat(newsrc)
#find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SimbadSource), minrad=3., maxdist=20.)
#load_ztf_auxcat(newsrc, maxdist=10, minrad=3)
#find_counterparts(newsrc, GenericSource.objects.filter(instance_of=ZTFOid), minrad=3., maxdist=20.)
cat.stop = ftime
cat.save()
return newsrc
def load_erosita_daily_source(filename):
""" Loads catalog of eRosita sources to database.
This procedure accepts filename in format 'catalog_OBSID.pkl',
where OBSID is 11-digit identificator. In case this OBSID is
already loaded to DB, the procedure exits. Otherwise,
catalog is loaded to DB.
Parameters
----------
filename : str
Absolute path to the input catalog file
Returns
-------
status : int
Status code {0:'Already in DB', ID:'Object ID or Primary key in DB'}
:Author:
Andrey Semena <san@cosmos.ru>
"""
#metadata = json.load(open(filename.split(".")[0] + ".json"))
obsid = re.findall("\d+", filename)[0]
logging.debug("Loading %s, ObsID: %s" % (filename,obsid,))
check_repeat = ErositaDailyCatalog.objects.filter(eroday_start=obsid[:5]).first()
if check_repeat and check_repeat.obsid >= obsid:
print("obsid exists")
logging.info("ObsID %s is already loaded, skip" % obsid)
return 0
logging.info("Loading ObsID %s" % obsid)
hp = HEALPix(nside=NSIDE_SOURCES,
order=ORDER, frame=FK5())
hp_plate = HEALPix(nside=NSIDE_PLATES,
order=ORDER_PLATES,
frame=FK5())
datafile=filename
eroday_start = int(obsid[0:5])
eroday_stop = int(obsid[5:10])
ts_start = eroday_start*14400
ts_stop = eroday_stop*14400
start = mission2date(ts_start)
stop = mission2date(ts_stop)
dtime_start = Time(start , format='datetime')
dtime_stop = Time(stop , format='datetime')
survey = ErositaDailyCatalog(name="ErositaDaily",
obsid=obsid,
eroday_start=eroday_start,
eroday_stop=eroday_stop,
start=start,
stop=stop,
mjd_start=dtime_start.mjd,
mjd_stop=dtime_stop.mjd)
survey.save()
if check_repeat and check_repeat.obsid < obsid:
print("removing older version of the catalog")
logging.info("remove older version of the catalog: %s" % obsid)
check_repeat.delete()
df=pd.read_pickle(datafile)
df.drop(columns=["tilenum", 'srcname', 'hpidx', "TSTART", "ID_SRC",
"TSTOP",], inplace=True, errors="ignore")
df.rename(columns={"RA":"ra", "DEC":"dec", "RADEC_ERR": "error_radius",
"srcID": "srcid", "ML_EXP_1":'APE_EXP_1'}, inplace=True)
df["flux"] = df.ML_FLUX_0.values*0.8911e-12
df["sxflux"] = df.flux
sc = SkyCoord(df.ra.values, df.dec.values, unit="deg", frame="fk5")
df["lii"] = sc.galactic.l.value
df["bii"] = sc.galactic.b.value
sc = sc[df.lii.values < 180.]
df = df[df.lii.values < 180.]
df["healpix"] = hp.skycoord_to_healpix(sc)
df["name"] = [make_source_name("SRGe", row.ra, row.dec) for row in df[["ra", "dec"]].itertuples()]
df["healpix_plate"] = hp_plate.skycoord_to_healpix(sc)
enew = []
for idx, src in df.iterrows():
enew.append(ErositaSource.objects.create(obsdate=survey.stop, **src))
survey.genericsource_set.add(*enew)
#newsrc = ErositaSource.objects.filter(catalog=survey) #survey.genericsource_set.all()
#newsrc = survey.genericsource_set.all()
newsrc = ErositaSource.objects.filter(id__in=survey.genericsource_set.all())
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=CSC2), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SXPS2), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=XMM3DR8), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=RXS2), minrad=20., maxdist=80.)
find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SDSSSpec), minrad=3., maxdist=20.)
find_counterparts(newsrc, GenericSource.objects.filter(Q(instance_of=ErositaSource) & Q(erositasource__obsdate__lt=cat.stop - datetime.timedelta(days=10))), minrad=3, maxdist=20)
load_simbad_auxcat(newsrc)
newsrc = newsrc.annotate(xct=Count("connections", filter=Q(connections__polymorphic_ctype__in=xraycats)))
hasxcounterparts = newsrc.filter(xct__gt=0)
hasxcounterparts.annotate(mcxflux=Max("connections__sxflux")).filter(sxflux__gt=F("mcxflux")*Value(10.)).update(transient=True)
GenericCatalog.objects.get(name="Erosita flux transients").genericsource_set.add(*hasxcounterparts)
hasecounterparts = newsrc.annotate(ect=Count("connections", filter=Q(connections__polymorphic_ctype=ContentType.objects.get_for_model(ErositaSource))))
hasecounterpartstr = hasecounterparts.filter(ect__gt=0, sxflux__gt=Value(10.)*Max("connections__sxflux", filter=Q(connections__polymorphic_ctype=ContentType.objects.get_for_model(ErositaSource))))
hasnoxcounterparts = newsrc.filter(xct=0).annotate(rassexp=Subquery(RosatExpMap.objects.filter(healpix=OuterRef("healpix")).values("exposure")[:1]))
GenericCatalog.objects.get(name="Erosita flux transients").genericsource_set.add(*hasecounterpartstr.filter(xct=0))
hasnoxcounterparts = hasnoxcounterparts.annotate(rasslim=Value(4.*5.76*0.89211e-12)*Power(Value(1.) + Power(F("rassexp")/Value(425), Value(4.)), Value(0.15))/F("rassexp"))
hasnoxcounterparts.filter(rassexp__gt=0, sxflux__gt=F("rasslim")*Value(10.)).update(rosat_transient=True)
GenericCatalog.objects.get(name="Erosita rosat expmap transient").genericsource_set.add(*hasnoxcounterparts.filter(rosat_transient=True))
load_ztf_auxcat(newsrc, maxdist=10, minrad=3)
return newsrc
def find_counterparts(srcquery, counterpatscat, neighsearch=1, **kwargs):
"""
for all sources in srcquery, which is assumed to have many-to-many relationtip to counterpatscat catalog,
performs search of sources in counterpart catalog within max(minrad[=30], 3*sqrt(src.poserr**2 + counterpart.poserr**2))
Parameters:
----------
srcquery: django.srcquery - iterable providing access to catalog under consideration
counterpartscat: catalog to search counterparts in
counterpartname: name of the many-to-many relationtip field in srcquery
**kwargs: additional arguments provided to makeposerrdist functions (see doc within)
"""
scat = read_frame(srcquery)
uhealpix = np.unique(scat.healpix.values)
uhealpix = np.unique(np.concatenate([uhealpix, neighbours(uhealpix, NSIDE_SOURCES, order=ORDER).ravel()]))
ccat = read_frame(counterpatscat.filter(healpix__in=uhealpix))
if scat.index.size == 0 or ccat.index.size == 0:
return 0
ccat.error_radius.fillna(0., inplace=True)
dmat = correlate_utils.makeposerrdist(scat, ccat, **kwargs)
uidx, ucts = np.unique(dmat.row, return_counts=True)
cs = np.empty(ucts.size + 1, np.int)
cs[0] = 0
cs[1:] = ucts.cumsum()
ce = np.lib.stride_tricks.as_strided(cs, (cs.size - 1, 2), cs.strides*2)
glist = []
for sid, cid, d in zip(scat.iloc[dmat.row].id.values, ccat.iloc[dmat.col].id.values, dmat.data):
if sid == cid:
continue
src1, src2 = srcquery.get(id=sid), counterpatscat.get(id=cid)
glist.append(GenericConnection(connected=src1, connectto=src2, separation=d))
glist.append(GenericConnection(connected=src2, connectto=src1, separation=d))
print("bulk create")
GenericConnection.objects.bulk_create(glist, ignore_conflicts=True)
def find_closest_counterpart(srcquery, counterpatscat, neighsearch=1):
scat = read_frame(srcquery)
#uhealpix = np.unique(scat.healpix.values)
#uhealpix = np.unique(np.concatenate([uhealpix, neighbours(uhealpix, NSIDE_SOURCES, order=ORDER).ravel()]))
#ccat = read_frame(counterpatscat.filter(healpix__in=uhealpix))
ccat = read_frame(counterpatscat)
if scat.index.size == 0 or ccat.index.size == 0:
return 0
ccat.error_radius.fillna(0., inplace=True)
dmat = correlate_utils.makedistmat3d(scat, ccat, 1200)
sidx = np.argsort(dmat.data)
print(dmat.data.size)
urow, uidx = np.unique(dmat.row[sidx], return_index=True)
ucol = dmat.col[sidx][uidx]
glist = []
for sid, cid, d in zip(scat.iloc[urow].id.values, ccat.iloc[ucol].id.values, dmat.data[sidx][uidx]):
if sid == cid:
continue
src1, src2 = srcquery.get(id=sid), counterpatscat.get(id=cid)
glist.append(GenericConnection(connected=src1, connectto=src2, separation=d))
glist.append(GenericConnection(connected=src2, connectto=src1, separation=d))
GenericConnection.objects.bulk_create(glist, ignore_conflicts=True)
def load_simbad_auxcat(genericsources, **kwargs):
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
srcs = read_frame(genericsources)
s1 = aSimbad()
s1.reset_votable_fields()
s1.add_votable_fields("z_value", "otype", "plx", "parallax", "plx_error", "plx_bibcode")
fails = []
simbadgenericcat = GenericCatalog.objects.get(name="Simbad")
print(srcs.index.size)
for i in range((srcs.index.size + 4095)//4096):
eloc = srcs.iloc[i*4096:min((i + 1)*4096, srcs.index.size)]
try:
simbadc = s1.query_region(SkyCoord(eloc.ra.values, eloc.dec.values, unit="deg", frame="fk5"), radius=kwargs.get("maxdist", 60.)*au.arcsec).to_pandas()
except:
fails.append(eloc.id.values)
continue
print(simbadc.index.size)
sc = SkyCoord(simbadc.RA.values + " " + simbadc.DEC.values, unit=("hourangle", "deg"), frame="fk5")
simbadc["ra"] = sc.fk5.ra.value
simbadc["dec"] = sc.fk5.dec.value
simbadc["healpix"] = hp.skycoord_to_healpix(sc)
#simbadc['name'] = simbadc.MAIN_ID.str.decode("ascii")
#print(simbadc.name)
#simbadc["name"] = simbadc.name.astype("str")
simbadc["name"] = simbadc.MAIN_ID.astype("str")
print(simbadc.name)
simbadc['main_id'] = simbadc.name
simbadc['obj_class'] = simbadc.OTYPE.str.decode("ascii")
simbadc['coo_bibcode'] = simbadc.COO_BIBCODE.str.decode("ascii")
simbadc['coo_wavelength'] = simbadc.COO_WAVELENGTH.str.decode("ascii")
simbadc.PLX_BIBCODE = simbadc.PLX_BIBCODE.fillna(simbadc.PLX_BIBCODE_2)
simbadc['plx_bib'] = simbadc.PLX_BIBCODE.str.decode("ascii")
simbadc.PLX_VALUE = simbadc.PLX_VALUE.fillna(simbadc.PLX_VALUE_2)
simbadc.PLX_ERROR = simbadc.PLX_ERROR.fillna(simbadc.PLX_ERROR_2)
print(simbadc)
print(simbadc.name)
#return simbadc
simbadc.drop_duplicates('name', inplace=True)
simbadc.rename(columns={"COO_ERR_MINA": "error_radius", "Z_VALUE": "z",
"PLX_VALUE":"plx", "PLX_ERROR": "plx_err"}, inplace=True)
simbadc.error_radius.fillna(0., inplace=True)
duplicates = SimbadSource.objects.filter(name__in=simbadc.name.values)
print(simbadc.index.size)
print(simbadc.name)
if duplicates.exists():
duplicatesnames = np.array(duplicates.values_list("name"))[:, 0]
mask = np.isin(simbadc.name.values, duplicatesnames, invert=True)
simbadc = simbadc[mask]
print(simbadc.index.size)
simbadc.drop(columns=["RA", "DEC", "RA_PREC", "DEC_PREC", "COO_ERR_MAJA",
"COO_ERR_ANGLE", "COO_QUAL", "MAIN_ID",
"OTYPE", "COO_WAVELENGTH", "COO_BIBCODE",
"PLX_BIBCODE", "PLX_ERROR_2", "PLX_QUAL",
"OTYPE", "PLX_VALUE_2", "PLX_PREC",
"PLX_BIBCODE_2"], inplace=True)
sobjlist = []
for i, sobj in simbadc.iterrows():
sobjlist.append(SimbadSource.objects.create(**sobj))
simbadgenericcat.genericsource_set.add(*sobjlist)
#ech = erosrcs.filter(id__in=eloc.id.values)
find_counterparts(genericsources, GenericSource.objects.filter(instance_of=SimbadSource), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
def load_allwise_auxcat(genericsources, **kwargs):
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
srcs = read_frame(genericsources)
#s1.reset_votable_fields()
fails = []
allwisegenericcat = GenericCatalog.objects.get(name="AllWISE")
for i in range((srcs.index.size + 4095)//4096):
eloc = srcs.iloc[i*4096:min((i + 1)*4096, srcs.index.size)]
print(eloc.index.size)
#allwisec = Vizier.query_region(SkyCoord(eloc.ra.values, eloc.dec.values, unit="deg", frame="fk5"), radius=kwargs.get("maxdist", 10.)*au.arcsec, catalog="II/328").to_pandas()
try:
allwisec = Vizier.query_region(SkyCoord(eloc.ra.values, eloc.dec.values, unit="deg", frame="fk5"), radius=kwargs.get("maxdist", 10.)*au.arcsec, catalog="II/328")[0].to_pandas()
except:
print("fail to load")
fails.append(eloc.id.values)
continue
print(allwisec.index.size)
allwisec.rename(columns={"RAJ2000":"ra", "DEJ2000":"dec", "W1mag":"w1mag", "W2mag":"w2mag","W3mag":"w3mag","W4mag":"w4mag",
"e_W1mag":"e_w1mag", "e_W2mag":"e_w2mag","e_W3mag":"e_w3mag","e_W4mag":"e_w4mag", "AllWISE": "name"}, inplace=True)
sc = SkyCoord(allwisec.ra.values, allwisec.dec.values, unit="deg", frame="fk5")
allwisec["lii"] = sc.galactic.l.value
allwisec["bii"] = sc.galactic.b.value
allwisec["name"] = allwisec.name.astype(str)
allwisec["healpix"] = hp.skycoord_to_healpix(sc)
allwisec.drop_duplicates('name', inplace=True)
allwisec["error_radius"] = np.zeros(allwisec.index.size)
duplicates = AllWISEgs.objects.filter(name__in=allwisec.name.values)
if duplicates.exists():
duplicatesnames = np.array(duplicates.values_list("name"))[:, 0]
mask = np.isin(allwisec.name.values, duplicatesnames, invert=True)
allwisec = allwisec[mask]
allwisec.drop(columns=allwisec.columns.difference([f.name for f in AllWISEgs._meta.get_fields()]), inplace=True)
sobjlist = []
for i, sobj in allwisec.iterrows():
sobjlist.append(AllWISEgs.objects.create(**sobj))
allwisegenericcat.genericsource_set.add(*sobjlist)
#ech = erosrcs.filter(id__in=eloc.id.values)
find_counterparts(genericsources, GenericSource.objects.filter(instance_of=AllWISEgs), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
def load_NVSS_auxcat(genericsources, **kwargs):
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
srcs = read_frame(genericsources)
fails = []
nvssgenericcat = GenericCatalog.objects.get(name="NVSS")
for i in range((srcs.index.size + 4095)//4096):
eloc = srcs.iloc[i*4096:min((i + 1)*4096, srcs.index.size)]
print(eloc.index.size)
#nvssc = Vizier.query_region(SkyCoord(eloc.ra.values, eloc.dec.values, unit="deg", frame="fk5"), radius=kwargs.get("maxdist", 10.)*au.arcsec, catalog="II/328").to_pandas()
try:
nvssc = Vizier.query_region(SkyCoord(eloc.ra.values, eloc.dec.values, unit="deg", frame="fk5"), radius=kwargs.get("maxdist", 10.)*au.arcsec, catalog="VIII/65")[0].to_pandas()
except:
print("fail to load")
fails.append(eloc.id.values)
continue
print(nvssc.index.size)
nvssc.rename(columns={"NVSS": "name", "S1.4":"s14", "e_S1.4":"s14e", "MajAxis":"maxjaxis", "MinAxis":"minaxis"}, inplace=True)
sc = SkyCoord(nvssc.RAJ2000.values, nvssc.DEJ2000.values, unit=("hourangle", "deg"), frame="fk5")
nvssc["ra"] = sc.ra.value
nvssc["dec"] = sc.dec.value
nvssc["lii"] = sc.galactic.l.value
nvssc["bii"] = sc.galactic.b.value
nvssc["name"] = nvssc.name.astype(str)
nvssc["healpix"] = hp.skycoord_to_healpix(sc)
nvssc.drop_duplicates('name', inplace=True)
nvssc["error_radius"] = np.sqrt(nvssc.e_RAJ2000.values**2.*np.cos(np.deg2rad(nvssc.dec.values))**2. + nvssc.e_DEJ2000.values**2.)
duplicates = NVSSgs.objects.filter(name__in=nvssc.name.values)
if duplicates.exists():
duplicatesnames = np.array(duplicates.values_list("name"))[:, 0]
mask = np.isin(nvssc.name.values, duplicatesnames, invert=True)
nvssc = nvssc[mask]
nvssc.drop(columns=nvssc.columns.difference([f.name for f in NVSSgs._meta.get_fields()]), inplace=True)
sobjlist = []
for i, sobj in nvssc.iterrows():
sobjlist.append(NVSSgs.objects.create(**sobj))
nvssgenericcat.genericsource_set.add(*sobjlist)
find_counterparts(genericsources, GenericSource.objects.filter(instance_of=NVSSgs), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
def load_ztf_auxcat(genericsources, **kwargs):
hp = HEALPix(nside=NSIDE_SOURCES, order=ORDER, frame=FK5())
hp_plates = HEALPix(nside=NSIDE_PLATES,
order=ORDER_PLATES, frame=FK5())
srcs = read_frame(genericsources)
uhealpix = np.unique(srcs.healpix.values)
uhealpix = np.unique(np.concatenate([uhealpix, neighbours(uhealpix, NSIDE_SOURCES, order=ORDER).ravel()]))
ztfurl = "https://mars.lco.global/"
params = {'format':'json', 'rb__gte':0.8,'nbad__lte':0,'fwhm__lte':5,'elong__lte':1.2}
ztfsrc = []
ztfloaded = ZTFOid.objects.filter(healpix__in=uhealpix)
for i, row in srcs.iterrows():
print("%d out of %d, %d loaded, ra & dec: %.5f %.5f" % (i, srcs.index.size, len(ztfsrc), row.ra, row.dec))
params["cone"] = "%.5f,%.5f,%.5f" % (row.ra, row.dec, kwargs.get("maxdist", 10)/3600.)
r = requests.get(url=ztfurl, params=params)
if not r.ok: continue
ztfsrc += r.json()["results"]
def unpack_mars_dict(d):
s = d["candidate"]
s["lco_id"] = d["lco_id"]
s["objectId"] = d["objectId"]
return pandas.Series(s)
pd = pandas.DataFrame([unpack_mars_dict(z) for z in ztfsrc])
pd = pd.drop_duplicates("candid")
pd.to_pickle("/srg/a1/work/andrey/rass/tmp_ztf_dumpy.pkl")
sc = SkyCoord(pd.ra.values, pd.dec.values, unit="deg", frame="fk5")
pd["healpix"] = hp.skycoord_to_healpix(sc)
pd["healpix_plate"] = hp_plates.skycoord_to_healpix(sc)
format_string='%a, %d %b %Y %H:%M:%S %Z'
pd["wt"] = [datetime.datetime.strptime(t, format_string) for t in pd.wall_time.values]
pd.drop(columns=["wall_time"], inplace=True)
pd.rename(columns={"wt":"wall_time"}, inplace=True)
pd.drop(columns=pd.columns.difference(["objectId",] + [field.name for field in ZTFlc._meta.get_fields()]), inplace=True)
pdgrp = pd.groupby("objectId")
ztfurl = "https://ztf.alerce.online/query"
ztfcat = GenericCatalog.objects.get(name="ZTF")
ztffieldnames = [field.name for field in ZTFOid._meta.get_fields()]
for name, group in pdgrp:
try:
ztfobj = ZTFOid.objects.get(objectId=name)
except ObjectDoesNotExist:
r = requests.post(ztfurl, json={"total":1, "query_parameters":{"filters":{"oid":name}}})
if r.ok and r.json()["result"] and name in r.json()["result"]:
ztfobj = r.json()["result"][name]
ztfobj["objectId"] = ztfobj["oid"]
ztfobj["ra"] = ztfobj["meanra"]
ztfobj["dec"] = ztfobj["meandec"]
sc = SkyCoord(ztfobj["meanra"], ztfobj["meandec"], unit="deg", frame="fk5")
ztfobj["healpix"] = hp.skycoord_to_healpix(sc)
ztfobj["healpix_plate"] = hp_plates.skycoord_to_healpix(sc)
ztfobj["name"] = ztfobj["objectId"]
print(ztfobj)
ztfobj = {k:v for k, v in ztfobj.items() if k in ztffieldnames}
if ztfobj["classrf"] is None:
ztfobj["classrf"] = "Not specified"
ztfobj["pclassrf"] = 1.
print(ztfobj)
ztfobj = ZTFOid.objects.create(**ztfobj)
ztfcat.genericsource_set.add(ztfobj)
else:
continue
clist = np.array([l[0] for l in ztfobj.ztflc_set.values_list("candid")])
for i, row in group[~group.candid.isin(clist)].drop(columns=["objectId",]).iterrows():
ZTFlc.objects.create(ztfobject=ztfobj, **row)
find_counterparts(genericsources, GenericSource.objects.filter(instance_of=ZTFOid), maxdist=kwargs.get("maxdist", 10.), minrad=kwargs.get("minrad", 3.))
ztfclasses={
None: "Not specified",
'null':"Not specified",
"classified":"Classified",
"not classified":"Not classified",
1:"Ceph",
2:"DSCT", #Delta SCuTi, a class of pulsating variable stars named after Delta Scuti, the archetype for the class
3:"EB", # Eclipsing Beta Lyrae, a class of eclipsing binary stars named after Beta Lyrae, the archetype for the class
4:"LPV", #Long Period Variable, a type of variable star that changes in brightness slowly over time
5:"RRL", #RR Lyrae, a class of pulsating variable stars named after RR Lyrae, the archetype of the class
6:"SN",
0:"Other",
7:"AGN I",
8:"Blazar",
9:"CV/Nova",
10:"SN Ia",
11:"SN Ibc",
12:"SN II",
14:"SLSN", #A super-luminous supernova (SLSN, plural super luminous supernovae or SLSNe) is a type of stellar explosion with a luminosity 10 or more times higher than that of standard supernovae
15:"EB/SD/D", # SD - subdwarf, stars fainter than main-sequence stars with the same colors; often used as a prefix to a star's spectral type
16:"EB/C",
17:"Periodic/Other",
18:"AGN",
19:"SN",
20:"Variable Star",
21:"Asteroid",
22:"Bogus",
23:"RS-CVn",
24:"QSO-I"}