srg/heasarc/tdat.py
2024-04-25 17:50:51 +03:00

166 lines
5.0 KiB
Python

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""HEASARC TDAT simple reader.
tDat.py:
HEASARC TDAT read functionality for simple character
delimited files with various options for column header definition.
:Copyright: Space Research Institute (IKI), Moscow, Russia (2019)
:Author: Roman Krivonos (krivonos@cosmos.ru)
"""
import re
import sys
import astropy
from astropy.io import ascii
from collections import OrderedDict
class tDatHeader(ascii.BaseHeader):
"""
TDAT Header Reader
Set a few defaults for common ascii table formats
(start at line 0, comments begin with ``#`` and possibly white space)
"""
#start_line = 0
#comment = r'\s*#'
comment = r'^\s*#.*' # Lines that start with "# " are comments
def __init__(self):
ascii.BaseHeader.__init__(self)
def process_lines(self, lines):
in_header = False
re_comment = re.compile(self.comment)
for line in lines:
match = re_comment.match(line)
if match:
continue
if line == "<DATA>":
in_header = False
return
elif in_header:
yield line
elif line == "<HEADER>":
in_header = True
def get_cols(self, lines):
columns = []
#re_name_def = re.compile(r'^\s*field\[([\w0-9\_]+)\]\s*=\s*([\w*\:\_\-\.]+)\s*\/\/ (.*)$') # E.g. 'field[name] ...'
re_name_def = re.compile(r'^field\[([\w0-9\_]+)\]\s*=\s*([\s*\w*\/\:\_\-\.\[\]\;\(\)\^]+)\s*\/\/ (.*)$') # E.g. 'field[name] ...'
self.names = []
for line in self.process_lines(lines):
match = re_name_def.search(line)
if match:
#print('cols',line)
columns.append({
"name": match.group(1),
"type": match.group(2),
"description": match.group(3)
})
self.names.append(match.group(1))
else:
pass
self._set_cols_from_names()
def update_meta(self, lines, meta):
"""
Extract table-level keywords for tDat table.
"""
table_meta = meta['table']
table_meta['comments'] = []
table_meta['keywords'] = OrderedDict()
table_meta['cols'] = OrderedDict()
keywords = table_meta['keywords']
cols = table_meta['cols']
#cols['tstart'] = {'value': 1, 'units': 'keV'}
#cols['tstop'] = {'value': 2, 'units': 'keV'}
keywords['key1'] = 'time'
keywords['key2'] = 'tim3'
val=1
table_meta['comments'].append(val)
val=2
table_meta['comments'].append(val)
val=3
table_meta['comments'].append(val)
re_name_def = re.compile(r'^field\[([\w0-9\_]+)\]\s*=\s*([\s*\w*\/\:\_\-\.\[\]\;\(\)\^]+)\s*\/\/ (.*)$') # E.g. 'field[name] ...'
re_table_def = re.compile(r'^([\w0-9\_]+)\s*=\s*(.*)$') # E.g. 'field[name] ...'
for line in self.process_lines(lines):
match = re_name_def.search(line)
if match:
#print('update_meta: ',line)
cols[match.group(1).strip()] = {"type": match.group(2).strip(), "description": match.group(3).strip()}
else:
match = re_table_def.search(line)
if match:
keywords[match.group(1).strip()]=match.group(2).strip()
print('>>',match.group(1), match.group(2))
pass
class tDatSplitter(ascii.DefaultSplitter):
"""Split lines on tab and do not remove whitespace"""
delimiter = '|'
process_line = None
class tDatData(ascii.BaseData):
"""
Basic table Data Reader
Set a few defaults for common ascii table formats
(start at line 1, comments begin with ``#`` and possibly white space)
"""
#start_line = 1
comment = r'\s*#'
splitter_class = tDatSplitter
def process_lines(self, lines):
in_data = False
for line in lines:
if line == "<END>":
return
elif line == "<DATA>":
in_data = True
elif in_data:
yield line[:-1]
class tDat(ascii.BaseReader):
r"""Character-delimited table with a single header line at the top.
Lines beginning with a comment character (default='#') as the first
non-whitespace character are comments.
Example table::
# Column definition is the first uncommented line
# Default delimiter is the space character.
apples oranges pears
# Data starts after the header column definition, blank lines ignored
1 2 3
4 5 6
"""
_format_name = 'tdat'
_description = 'Basic table with custom delimiters'
_io_registry_format_aliases = ['heasarctdat']
header_class = tDatHeader
data_class = tDatData
def read(self, table):
output = ascii.BaseReader.read(self, table)
self.table = output
self.cols = self.header.cols
return self.table