srg/heasarc/tdat.py

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""HEASARC TDAT simple reader.

tDat.py:
  HEASARC TDAT read functionality for simple character
  delimited files with various options for column header definition.

:Copyright: Space Research Institute (IKI), Moscow, Russia (2019)
:Author: Roman Krivonos (krivonos@cosmos.ru)
"""


import re
import sys

import astropy
from astropy.io import ascii
from collections import OrderedDict

class tDatHeader(ascii.BaseHeader):
    """
    TDAT Header Reader

    Set a few defaults for common ascii table formats
    (start at line 0, comments begin with ``#`` and possibly white space)
    """
    #start_line = 0
    #comment = r'\s*#'
    comment = r'^\s*#.*'  # Lines that start with "# " are comments

    def __init__(self):
        ascii.BaseHeader.__init__(self)

    def process_lines(self, lines):
        in_header = False
        re_comment = re.compile(self.comment)
        for line in lines:
            match = re_comment.match(line)
            if match:
                continue
            if line == "<DATA>":
                in_header = False
                return
            elif in_header:
                yield line
            elif line == "<HEADER>":
                in_header = True

    def get_cols(self, lines):
        columns = []
        #re_name_def = re.compile(r'^\s*field\[([\w0-9\_]+)\]\s*=\s*([\w*\:\_\-\.]+)\s*\/\/ (.*)$')  # E.g. 'field[name] ...'
        re_name_def = re.compile(r'^field\[([\w0-9\_]+)\]\s*=\s*([\s*\w*\/\:\_\-\.\[\]\;\(\)\^]+)\s*\/\/ (.*)$')  # E.g. 'field[name] ...'
        self.names = []
        for line in  self.process_lines(lines):
            match = re_name_def.search(line)
            if match:
                #print('cols',line)
                columns.append({
                    "name": match.group(1),
                    "type": match.group(2),
                    "description": match.group(3)
                })
                self.names.append(match.group(1))
            else:
                pass

        self._set_cols_from_names()

    def update_meta(self, lines, meta):
        """
        Extract table-level keywords for tDat table.
        """
        table_meta = meta['table']
        table_meta['comments'] = []
        table_meta['keywords'] = OrderedDict()
        table_meta['cols'] = OrderedDict()

        keywords = table_meta['keywords']
        cols = table_meta['cols']

        #cols['tstart'] = {'value': 1, 'units': 'keV'}
        #cols['tstop'] = {'value': 2, 'units': 'keV'}

        keywords['key1'] = 'time'
        keywords['key2'] = 'tim3'

        val=1
        table_meta['comments'].append(val)
        val=2
        table_meta['comments'].append(val)
        val=3
        table_meta['comments'].append(val)

        re_name_def = re.compile(r'^field\[([\w0-9\_]+)\]\s*=\s*([\s*\w*\/\:\_\-\.\[\]\;\(\)\^]+)\s*\/\/ (.*)$')  # E.g. 'field[name] ...'
        re_table_def = re.compile(r'^([\w0-9\_]+)\s*=\s*(.*)$')  # E.g. 'field[name] ...'

        for line in  self.process_lines(lines):
            match = re_name_def.search(line)
            if match:
                #print('update_meta: ',line)
                cols[match.group(1).strip()] = {"type": match.group(2).strip(), "description": match.group(3).strip()}
            else:
                match = re_table_def.search(line)
                if match:
                    keywords[match.group(1).strip()]=match.group(2).strip()
                    print('>>',match.group(1), match.group(2))
                pass

class tDatSplitter(ascii.DefaultSplitter):
    """Split lines on tab and do not remove whitespace"""
    delimiter = '|'
    process_line = None

class tDatData(ascii.BaseData):
    """
    Basic table Data Reader

    Set a few defaults for common ascii table formats
    (start at line 1, comments begin with ``#`` and possibly white space)
    """
    #start_line = 1
    comment = r'\s*#'
    splitter_class = tDatSplitter

    def process_lines(self, lines):
        in_data = False
        for line in lines:
            if line == "<END>":
                return
            elif line == "<DATA>":
                in_data = True
            elif in_data:
                yield line[:-1]


class tDat(ascii.BaseReader):
    r"""Character-delimited table with a single header line at the top.

    Lines beginning with a comment character (default='#') as the first
    non-whitespace character are comments.

    Example table::

      # Column definition is the first uncommented line
      # Default delimiter is the space character.
      apples oranges pears

      # Data starts after the header column definition, blank lines ignored
      1 2 3
      4 5 6
    """
    _format_name = 'tdat'
    _description = 'Basic table with custom delimiters'
    _io_registry_format_aliases = ['heasarctdat']

    header_class = tDatHeader
    data_class = tDatData

    def read(self, table):
        output = ascii.BaseReader.read(self, table)
        self.table = output
        self.cols = self.header.cols
        return self.table