srg/srglib/load_transinets.py

import pandas
import glob
import os
from srglib import correlate_utils
from srglib import genericsourceutils as gsu
from astropy.coordinates import SkyCoord
from django_pandas.io import read_frame
import numpy as np
from astropy.time import Time, TimeDelta
import datetime
from math import sin, cos, sqrt, pi
from ErositaDailySourceCatalogues.models import ErositaSource
import  pytz
import subprocess
from genericsource.models import *
from ErositaDailySourceCatalogues import views as eviews
from ErositaDailySourceCatalogues.models import *
from auxcatalogues.models import SimbadSource, RXS2, SXPS2, CSC2, \
                        XMM3DR8, ZTFSource, SDSSSpec, GAIASource, ZTFOid, ZTFlc, AllWISEgs, \
                        NVSSgs

northpole = np.array([-4.26367573e-16, -3.97777165e-01,  9.17482058e-01])
surveystart = np.array([-7.43149503e-06, -4.00344939e-04, -1.73318355e-04])
pangle = np.cross(northpole, surveystart)
pangle = pangle/np.sqrt(np.sum(pangle**2.))

SurveyCatName = {1: "1st survey transients",
                 2: "2nd survey transients",
                 3: "3rd survey transients",}


def add_aux_data(srcid, survey, coord=None, newsourceid=None):
    #print("/data/erosita/transients/products/trans_%d_e%d-1.png" % (srcid, survey))
    phimg = glob.glob("/data/erosita/transients/products/trans_%d_e%d.png" % (srcid, survey))

    res = {k: "" for k in ["spec", "specimg", "arf", "rmf", "bkg", "reg", "log", "lcfits", "lcpdf"]}
    if phimg: res["img"] = os.path.basename(phimg[0])
    cwd = os.getcwd()
    os.chdir("/data/erosita/transients/products")
    rname = "" if not "spec" in res else res["spec"]
    if newsourceid:
        dpath = "/data/products/erotrans/%d" % newsourceid
        ppath = "/data/erosita/transients/products/"
        if not os.path.exists(dpath):
            os.mkdir(dpath)
        if "img" in res: subprocess.run(["cp", os.path.join(ppath, res["img"]), os.path.join(dpath, res["img"])])
        os.chdir(dpath)
    return res


def get_closest_unique_withinapp(ecat, ncat, apps):
    """
    find all closest unique counterparts  within definde distance
    """
    urow = np.empty(0, np.int)
    ucol = np.empty(0, np.int)
    if ecat.index.size:
        maske = np.ones(ecat.index.size, np.bool)
        maskn = np.ones(ncat.index.size, np.bool)
        eidx = np.arange(ecat.index.size)
        nidx = np.arange(ncat.index.size)
        for i in range(21):
            if not (np.any(maske) and np.any(maskn)):
                break
            row, col = correlate_utils.get_closest_unique(ecat[maske], ncat[maskn], apps)
            print(row.size)
            if row.size == 0 or np.all(~maske) or np.all(~maskn):
                break
            urow = np.concatenate([urow, eidx[maske][row]])
            ucol = np.concatenate([ucol, nidx[maskn][col]])
            maske[urow] = False
            maskn[ucol] = False

    if False: #urow.size:
        mask = np.array([abs((ecat.iloc[i].obsdate - ncat.iloc[j].obsdate).days) for i, j in zip(urow, ucol)]) < 3
        urow, ucol = urow[mask], ucol[mask]
    return urow, ucol


def load_nondetections(ecat, ncat, catsrc, apps, survey):
    urow, ucol = get_closest_unique_withinapp(ecat, ncat, apps)
    print(urow.size, ncat.index.size, ecat.index.size)
    ncatidd = ncat.drop(columns=["id",])

    for i, j in zip(urow, ucol):
        src = catsrc.filter(id__in=[ecat.iloc[i].id,])
        src.update(**ncatidd.iloc[j])
        src = src.first()
        sc = SkyCoord(src.ra, src.dec, unit="deg", frame="fk5")
        auxdata = add_aux_data(ncat.iloc[j].id, survey, sc, src.id)
        try:
            auxdata = add_aux_data(ncat.iloc[j].id, survey, sc, src.id)
        except Exception:
            print("warn: no spectum info for %d found" % ncat.iloc[j].id)
        else:
            #print("auxdata", auxdata)
            try:
                adata = src.srcauxdata
            except:
                adata = SrcAuxDATA.objects.create(src=src)

            #print("update source", auxdata)
            for key, val in auxdata.items():
                adata.__setattr__(key, val)
                #print(key, val, getattr(adata, key))
                adata.save()
            for key in ["spec", "specimg", "arf", "rmf", "bkg", "reg", "log", "lcfits", "lcpdf"]:
                adata.__setattr__(key, "")
                adata.save()

    newsrc = []
    if ucol.size < ncat.index.size:
        idx = np.arange(ncat.index.size)
        idx = idx[np.isin(idx, ucol, invert=True, assume_unique=True)]
        print(idx.size)
        for i in idx:
            #print(ncatidd.iloc[i])
            src = ErositaSource.objects.create(**ncatidd.iloc[i], srcstatus="new")
            sc = SkyCoord(src.ra, src.dec, unit="deg", frame="fk5")
            print("processing", ncat.iloc[i].id, src.id)
            auxdata = add_aux_data(ncat.iloc[i].id, survey, sc, src.id)
            try:
                auxdata = add_aux_data(ncat.iloc[i].id, survey, sc, src.id)
            except Exception:
                print("warn: no spectum info found")
            else:
                pass
            #print('create  new source', auxdata)
            SrcAuxDATA.objects.create(src=src, **auxdata)
            newsrc.append(src)

    #catsrc.genericsource_set.add(*newsrc)
    return newsrc


def update_columns(mtable, survey):
    sc = SkyCoord(mtable.ra.values, mtable.dec.values, unit="deg", frame="fk5")
    mtable["lii"] = sc.galactic.l.value
    mtable["bii"] = sc.galactic.b.value
    mtable["healpix"] = gsu.hp.skycoord_to_healpix(sc)
    mtable["healpix_plate"] = gsu.hp_plates.skycoord_to_healpix(sc)
    mtable["detected"] = False
    mtable["name"] = [gsu.make_source_name("SRGe", r, d) for r, d  in zip(mtable.ra.values, mtable.dec.values)]
    mtable["sxflux"] = mtable["UPLIM_e%d" % survey]
    mtable["id"] = mtable.index.values

    """
    vecs = np.empty((mtable.index.size, 3), np.double)
    vecs[:, 0] = np.cos(mtable.dec.values*pi/180.)*np.cos(mtable.ra.values*pi/180.)
    vecs[:, 1] = np.cos(mtable.dec.values*pi/180.)*np.sin(mtable.ra.values*pi/180.)
    vecs[:, 2] = np.sin(mtable.dec.values*pi/180.)

    vecsp = vecs - northpole*np.sum(northpole*vecs, axis=1)[:, np.newaxis]
    vecsp = vecsp/np.sqrt(np.sum(vecsp**2, axis=1))[:, np.newaxis]

    alpha = np.arctan2(np.sum(vecsp*pangle, axis=1), np.sum(vecsp*surveystart, axis=1))
    alpha[alpha < 0] = pi + alpha[alpha < 0.]
    """

    tstart = mtable["TSTART_e%d" % survey].values
    tstop = mtable["TSTOP_e%d" % survey].values

    """
    mtable["obsdate"] = (Time(51543.87, format="mjd") + TimeDelta((tstart + tstop)/2., format="sec")).to_datetime(timezone=pytz.UTC)
    mtable["tstart"] = (Time(51543.87, format="mjd") + TimeDelta(tstart, format="sec")).to_datetime(timezone=pytz.UTC)
    mtable["tstop"] = (Time(51543.87, format="mjd") + TimeDelta(tstop, format="sec")).to_datetime(timezone=pytz.UTC)
    """
    mtable["tstart"] = mtable["TSTART_e%d" % survey]
    mtable["tstop"] = mtable["TSTOP_e%d" % survey]
    mtable["obsdate"] = mtable["TSTART_e%d" % survey] + (mtable["TSTOP_e%d" % survey] - mtable["TSTART_e%d" % survey])/2.

    mtable.drop(columns=mtable.columns.difference([f.name for f in ErositaSource._meta.get_fields()] + ["id",]), inplace=True)
    print(survey, mtable.index.size)
    return mtable


def read_master_table(mastertablename, **kwargs):
    mtable = pandas.read_pickle(mastertablename)

    newsrc = []
    for survey in [2,]: #np.arange(1, 4):
        mel = mtable[mtable["CTS_e%d" % survey].isna() & ~mtable["TSTART_e%d" % survey].isna()].copy()
        mel = update_columns(mel, survey)
        ccat = ErositaSource.objects.filter(id__in=GenericCatalog.objects.get(name=SurveyCatName[survey]).genericsource_set.all())
        ecat = read_frame(ccat)
        nset = load_nondetections(ecat, mel, ccat, 30, survey)
        GenericCatalog.objects.get(name=SurveyCatName[survey]).genericsource_set.add(*nset)
        newsrc += nset

    if len(newsrc) > 0:
        newsrc = GenericSource.objects.filter(id__in=[src.id for src in newsrc])
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=CSC2), minrad=3., maxdist=20.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SXPS2), minrad=3., maxdist=20.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=XMM3DR8), minrad=3., maxdist=20.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=RXS2), minrad=20., maxdist=80.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SDSSSpec), minrad=3., maxdist=20.)
        gsu.find_from_old_gaia_db(newsrc, minrad=5, maxdist=20)
        #gsu.load_allwise_auxcat(newsrc, maxdist=10, minrad=3)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=AllWISEgs), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
        #gsu.load_NVSS_auxcat(newsrc, maxdist=10, minrad=3)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=NVSSgs), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
        #find_counterparts(newsrc, GenericSource.objects.filter(Q(instance_of=ErositaSource) & Q(erositasource__obsdate__lt=cat.stop - datetime.timedelta(days=10))), minrad=3, maxdist=20)
        #gsu.load_simbad_auxcat(newsrc)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SimbadSource), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
        #gsu.load_ztf_auxcat(newsrc, maxdist=10, minrad=3)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=ZTFOid), maxdist=kwargs.get("maxdist", 10.), minrad=kwargs.get("minrad", 3.))

def set_transient_data(ecat):
    ecat = eviews.create_annotations(ecat)
    ecat = eviews.addErositaSrcannotattion(ecat)
    ecat = ecat.filter(ratio_to_maxxflux__gt=1.)
    print(ecat.count())
    for src in ecat:
        try:
            ErositaTransient.objects.create(src=src, plx=src.plx, plxe=src.plxe,
                                            z=src.ssz if src.ssz else src.sdssz,
                                            ssep=src.ssep,
                                            sname=src.sname if src.sname else "",
                                            ztfcl=src.ztfcl if src.ztfcl else "",
                                            ztfnm=src.ztfnm if src.ztfnm else "",
                                            zsep=src.zsep, plx_sep=src.plx_sep,
                                            plx_mag=src.plx_mag,
                                            sclass=src.sclass if src.sclass else "",
                                            ratio_to_maxxflux=src.ratio_to_maxxflux,
                                            ratio_to_minxflux=src.ratio_to_minxflux)
        except:
            raise
            print(src.name, " already has transints")
        else:
            print("transient for %s created" % src.name)

def read_master_table_and_surveys(mastertablename, surveynames, **kwargs):
    mtable = pandas.read_pickle(mastertablename)
    mtable.rename(columns={"RA": "ra", "DEC": "dec"}, inplace=True)
    elast = ErositaSource.objects.last() #order_by("created").last()
    newsrc = []

    for survey in np.arange(1, 4):
        detectedsources, ftime = gsu.read_esrc_pack(surveynames % survey)
        print("load survey", survey, surveynames % survey)
        detectedsources["survey"] = survey
        dmat = correlate_utils.makedistmat3d(detectedsources, mtable, 40.)
        urow, ucol = correlate_utils.select_closest_pairs(dmat.data, dmat.row, dmat.col)
        detectedsources = detectedsources.iloc[urow].set_index(mtable.index.values[ucol])
        ns = gsu.load_erosita_trans_daily_cat(detectedsources, ftime, 30, load_cat=SurveyCatName[survey])
        newsrc += list(ns)

        mel = mtable[mtable["CTS_e%d" % survey].isna() & ~mtable["TSTART_e%d" % survey].isna()].copy()
        mel = update_columns(mel, survey)
        ccat = ErositaSource.objects.filter(id__in=GenericCatalog.objects.get(name=SurveyCatName[survey]).genericsource_set.all())
        ecat = read_frame(ccat)
        nset = load_nondetections(ecat, mel, ccat, 30, survey)
        GenericCatalog.objects.get(name=SurveyCatName[survey]).genericsource_set.add(*nset)
        newsrc += nset


    allloaded = ErositaSource.objects.filter(created__gt=elast.created)
    if len(newsrc) > 0:
        newsrc = GenericSource.objects.filter(id__in=[src.id for src in newsrc])
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=CSC2), minrad=3., maxdist=20.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SXPS2), minrad=3., maxdist=20.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=XMM3DR8), minrad=3., maxdist=20.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=RXS2), minrad=20., maxdist=80.)
        gsu.find_counterparts(newsrc, GenericSource.objects.filter(instance_of=SDSSSpec), minrad=3., maxdist=20.)
        gsu.find_from_old_gaia_db(newsrc, minrad=5, maxdist=20)
        gsu.load_allwise_auxcat(newsrc, maxdist=10, minrad=3)
        gsu.find_counterparts(allloaded, GenericSource.objects.filter(instance_of=AllWISEgs), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
        gsu.load_NVSS_auxcat(newsrc, maxdist=10, minrad=3)
        gsu.find_counterparts(allloaded, GenericSource.objects.filter(instance_of=NVSSgs), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
        gsu.load_simbad_auxcat(newsrc)
        gsu.find_counterparts(allloaded, GenericSource.objects.filter(instance_of=SimbadSource), maxdist=kwargs.get("maxdist", 20.), minrad=kwargs.get("minrad", 3.))
        gsu.load_ztf_auxcat(newsrc, maxdist=10, minrad=3)
        gsu.find_counterparts(allloaded, GenericSource.objects.filter(instance_of=ZTFOid), maxdist=kwargs.get("maxdist", 10.), minrad=kwargs.get("minrad", 3.))
    set_transient_data(allloaded)
    GenericCatalog.objects.get(name='transients 2').genericsource_set.add(*allloaded.exclude(erositatransient=None))