mvn_flight/bin/brd_wheel_1Hz_parser.py

import pandas as pd
import os
import re
from pathlib import Path
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

tstamp_s = '%d.%m.%Y %H:%M:%S.%f'
ox_dtime_format = '%d.%m.%Y %H:%M'

path_itog_brd_data = '../data/brd_data/'

class PathFileNotFound(Exception):
    pass

def find_required_files(root_dir, pattern):
    result = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            match = re.match(pattern, filename)
            if match:
                result.append(dirpath + '/' + filename)

    if len(result) == 0:
        raise PathFileNotFound(f'error: check that the path is correct ({root_dir}) or files pattern is correct ({pattern})')

    return sorted(result)

def read_files_into_df(fname_list, column_list, dtype_columns={}):
    data_itog = pd.DataFrame()
    epoch_start = pd.Timestamp('2000-01-01')

    for fname in fname_list:
        data = pd.read_csv(fname, sep=r'\s+', dtype=str)
        data = data.dropna()
        data = data[column_list]

        if 'TIME' in column_list:
            # convert TIME value to human-readable timestamp (sinse epoch 01.01.2000)
            time = data['TIME'].astype(float)
            tstamp = epoch_start + pd.to_timedelta(time, unit='s')
            timestamp = tstamp.dt.strftime(tstamp_s)
            data['timestamp'] = timestamp

            # clear dataframe rows where time value == 0
            data['time'] = time
            data_clear = data.query('time != 0.0')

        data_itog = pd.concat([data_itog, data_clear], ignore_index=True)

    return data_itog


def collect_tm_brd_files(root_dir_tm_data, column_list, column_list_itog):
    patterns_tm = [r'mvn_tm_brd01_(.*)', r'mvn_tm_brd02_(.*)', r'mvn_tm_brd03_(.*)',
        r'mvn_tm_brd04_(.*)']

    for pattern in patterns_tm:
        fname = path_itog_brd_data + pattern[:12] + '.csv'
        try:
            found_files = find_required_files(root_dir_tm_data, pattern)
            data = read_files_into_df(found_files, column_list, dtype_columns={11: float})
        except KeyError as e:
            print(f'error in collect_tm_brd_files: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})')
            break
        except Exception as e:
            print(f'error in collect_tm_brd_files: {e}')
            break

        data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig')
        print('data saved: ' + fname)


def collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog):
    patterns_wheel = [r'mvn_wheel_brd01_(.*)', r'mvn_wheel_brd02_(.*)', r'mvn_wheel_brd03_(.*)',
        r'mvn_wheel_brd04_(.*)']

    for pattern in patterns_wheel:
        fname = path_itog_brd_data + pattern[:15] + '.csv'
        try:
            found_files = find_required_files(root_dir_wheel_data, pattern)
            data = read_files_into_df(found_files, column_list, dtype_columns={0: float, 1: int})
        except KeyError as e:
            print(f'error in collect_tm_brd_wheel_data: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})')
            break
        except Exception as e:
            print(f'error in collect_tm_brd_wheel_data: {e}')
            break

        mask = data['STATE'] == '0'
        data = data[mask]
        data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig')
        print('data saved: ' + fname)


## collect raw tm brd data into one file for each brd

root_dir_tm_data = '/home/danila/Danila/work/MVN/flight/brd_data/arch_for_MB/archive_tm_data_txt/'
column_list = ['TIME', 'PER_1Hz', 'ST_HV']
column_list_itog = ['TIME', 'timestamp', 'PER_1Hz', 'ST_HV']

collect_tm_brd_files(root_dir_tm_data, column_list, column_list_itog)


root_dir_wheel_data = '/home/danila/Danila/work/MVN/flight/brd_data/arch_for_MB/archive_wheel_data_txt/'
column_list = ['TIME', 'STATE']
column_list_itog = ['TIME', 'timestamp', 'STATE']

collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog)


## plot 'evolution' 1 Hz from tm brd data

fname = path_itog_brd_data + 'mvn_tm_brd01.csv'
dateparse = lambda x: datetime.strptime(x, tstamp_s)
df = pd.read_csv(fname, sep=';', parse_dates=['timestamp'], date_parser=dateparse)

plt.plot(df['timestamp'], df['PER_1Hz'], '.')
plt.show()


border_clr_wheel = 2
fname = path_itog_brd_data + 'mvn_wheel_brd01.csv'
wheel_df = pd.read_csv(fname, sep=';')
wheel_df['TIME_diff'] = wheel_df['TIME'].diff()
median_tdiff = wheel_df['TIME_diff'].median()

wheel_df_clear = wheel_df[(wheel_df['TIME_diff'] > median_tdiff - border_clr_wheel) &
        (wheel_df['TIME_diff'] < median_tdiff + border_clr_wheel)]

wheel_df_peaks = wheel_df[(wheel_df['TIME_diff'] <= median_tdiff - border_clr_wheel) |
        (wheel_df['TIME_diff'] >= median_tdiff + border_clr_wheel)]


plt.plot(wheel_df_clear['TIME'], wheel_df_clear['TIME_diff'])
plt.show()


# df1 = df[df['TIME_diff'] < 30.6]
# print(df[df['TIME_diff'] > 30.6 or df['TIME_diff'] < 29.4] )

# for idx, row in df.iterrows():
#     print(row['TIME'])