srg/astrobasis/management/commands/00_test_ztf.py

from django.core.management.base import BaseCommand, CommandError

import os
import io
import gzip
import tarfile
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
import fastavro

from astropy.time import Time
from astropy.io import fits
import astropy.units as u
import aplpy

from collections import defaultdict

import requests
import json
import pprint

"""
https://zwickytransientfacility.github.io/ztf-avro-alert/filtering.html

Based on tests done at IPAC (F. Masci, priv. comm), the following filter delivers a relatively pure sample:

rb >= 0.65 and
nbad = 0 and
fwhm <= 5 and
elong <= 1.2 and
abs(magdiff) <= 0.1
"""

def plot_lightcurve(dflc, ax=None, days_ago=True):

    filter_color = {1:'green', 2:'red', 3:'pink'}
    if days_ago:
        now = Time.now().jd
        t = dflc.jd - now
        xlabel = 'Days Ago'
    else:
        t = dflc.jd
        xlabel = 'Time (JD)'

    if ax is None:
        plt.figure()
    for fid, color in filter_color.items():
        # plot detections in this filter:
        w = (dflc.fid == fid) & ~dflc.magpsf.isnull()
        if np.sum(w):
            plt.errorbar(t[w],dflc.loc[w,'magpsf'], dflc.loc[w,'sigmapsf'],fmt='.',color=color)
        wnodet = (dflc.fid == fid) & dflc.magpsf.isnull()
        if np.sum(wnodet):
            plt.scatter(t[wnodet],dflc.loc[wnodet,'diffmaglim'], marker='v',color=color,alpha=0.25)

    plt.gca().invert_yaxis()
    plt.xlabel(xlabel)
    plt.ylabel('Magnitude')
    plt.title(dflc.objectId)

def plot_cutout(stamp, fig=None, subplot=None, **kwargs):
    with gzip.open(io.BytesIO(stamp), 'rb') as f:
        with fits.open(io.BytesIO(f.read())) as hdul:
            if fig is None:
                fig = plt.figure(figsize=(4,4))
            if subplot is None:
                subplot = (1,1,1)
            ffig = aplpy.FITSFigure(hdul[0],figure=fig, subplot=subplot, **kwargs)
            ffig.show_grayscale(stretch='arcsinh')
    return ffig

def show_stamps(packet):
    #fig, axes = plt.subplots(1,3, figsize=(12,4))
    fig = plt.figure(figsize=(12,4))
    for i, cutout in enumerate(['Science','Template','Difference']):
        stamp = packet['cutout{}'.format(cutout)]['stampData']
        ffig = plot_cutout(stamp, fig=fig, subplot = (1,3,i+1))
        ffig.set_title(cutout)

def show_all(packet):
    fig = plt.figure(figsize=(16,4))
    dflc = make_dataframe(packet)
    plot_lightcurve(dflc,ax = plt.subplot(1,4,1))
    for i, cutout in enumerate(['Science','Template','Difference']):
        stamp = packet['cutout{}'.format(cutout)]['stampData']
        ffig = plot_cutout(stamp, fig=fig, subplot = (1,4,i+2))
        ffig.set_title(cutout)

def make_dataframe(packet):
    dfc = pd.DataFrame(packet['candidate'], index=[0])
    df_prv = pd.DataFrame(packet['prv_candidates'])
    dflc = pd.concat([dfc,df_prv], ignore_index=True, sort=False)
    # we'll attach some metadata--not this may not be preserved after all operations
    # https://stackoverflow.com/questions/14688306/adding-meta-information-metadata-to-pandas-dataframe
    dflc.objectId = packet['objectId']
    dflc.candid = packet['candid']
    return dflc

def is_alert_pure(packet):
    pure = True
    pure &= packet['candidate']['rb'] >= 0.65
    pure &= packet['candidate']['nbad'] == 0
    pure &= packet['candidate']['fwhm'] <= 5
    pure &= packet['candidate']['elong'] <= 1.2
    pure &= np.abs(packet['candidate']['magdiff']) <= 0.1
    return pure

def generate_dictionaries(root_dir):
    for fname in find_files(root_dir):
        for packet in open_avro(fname):
            yield packet

def open_avro(fname):
    with open(fname,'rb') as f:
        freader = fastavro.reader(f)
        # in principle there can be multiple packets per file
        for packet in freader:
            yield packet

def find_files(root_dir):
    for dir_name, subdir_list, file_list in os.walk(root_dir, followlinks=True):
        for fname in file_list:
            if fname.endswith('.avro'):
                yield dir_name+'/'+fname

def is_transient(dflc):

    candidate = dflc.loc[0]

    is_positive_sub = candidate['isdiffpos'] == 't'

    if (candidate['distpsnr1'] is None) or (candidate['distpsnr1'] > 1.5):
        no_pointsource_counterpart = True
    else:
        if candidate['sgscore1'] < 0.5:
            no_pointsource_counterpart = True
        else:
            no_pointsource_counterpart = False

    where_detected = (dflc['isdiffpos'] == 't') # nondetections will be None
    if np.sum(where_detected) >= 2:
        detection_times = dflc.loc[where_detected,'jd'].values
        dt = np.diff(detection_times)
        not_moving = np.max(dt) >= (30*u.minute).to(u.day).value
    else:
        not_moving = False

    no_ssobject = (candidate['ssdistnr'] is None) or (candidate['ssdistnr'] < 0) or (candidate['ssdistnr'] > 5)

    return is_positive_sub and no_pointsource_counterpart and not_moving and no_ssobject

class Command(BaseCommand):
    help = 'Initiates data dase'

    def handle(self, *args, **options):

        query_86400='https://mars.lco.global/?format=json&sort_value=jd&sort_order=desc&objectId=&candid=&time__gt=&time__lt=&time__since=86400&jd__gt=&jd__lt=&filter=&cone=&objectcone=&objectidps=&ra__gt=&ra__lt=&dec__gt=&dec__lt=&l__gt=&l__lt=&b__gt=&b__lt=&magpsf__gte=&magpsf__lte=&sigmapsf__lte=&magap__gte=&magap__lte=&distnr__gte=&distnr__lte=&deltamaglatest__gte=&deltamaglatest__lte=&deltamagref__gte=&deltamagref__lte=&elong__lte=&nbad__lte=&rb__gte=&drb__gte=&classtar__gte=&classtar__lte=&fwhm__lte='

        URL = "https://mars.lco.global/"

        # defining a params dict for the parameters to be sent to the API
        #data =  {"queries": [{"time_since": 100,},]}

        PARAMS={'format':'json', 'time__since':86400}

        # sending get request and saving the response as response object
        r = requests.get(url = URL, params = PARAMS)


        # extracting data in json format
        data = r.json()
        for d in data:
            print(d)


        #pprint.pprint(data['results'])

        for item in data['results']:
            print(item['lco_id'],item['objectId'],item['candidate']['wall_time'])

        return

        tar_archive = '/export/django/srg/data/ZTF/ztf_public_20200125.tar.gz'
        output_dir = tar_archive.split('/')[-1].split('.')[-3]
        print (output_dir)

        print('{} has {} avro files'.format(output_dir, len(list(find_files(output_dir)))))


        #archive = tarfile.open(tar_archive,'r:gz')
        #archive.extractall(path=output_dir)
        #archive.close()

        programs = defaultdict(int)
        #for packet in generate_dictionaries(output_dir):
        #    programs[packet['candidate']['programid']] += 1
        #print(programs)

        for packet in filter(is_alert_pure,generate_dictionaries(output_dir)):
            programs[packet['candidate']['programid']] += 1
        print(programs)

        transient_alerts = []
        for packet in filter(is_alert_pure,generate_dictionaries(output_dir)):
            dflc = make_dataframe(packet)
            if is_transient(dflc):
                print(packet['objectId'],packet['candidate']['ra'],packet['candidate']['dec'])
                transient_alerts.append(packet)

        """
        for packet in transient_alerts:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                show_all(packet)
        """