mvn_flight/bin/tm_wheel_parser.py

import pandas as pd
import os
import re
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib import dates
from datetime import datetime, timedelta
import sys
import seaborn as sns

tstamp_s = '%d.%m.%Y %H:%M:%S.%f'
ox_dtime_format = '%H:%M'
ox_date_format = '%d.%m.%Y'

path_itog_brd_data = '../data/brd_data/'
pict_name = '../plots/' + 'MVN_wheel'
font = 16


class PathFileNotFound(Exception):
    pass


def find_required_files(root_dir, pattern):
    result = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            match = re.match(pattern, filename)
            if match:
                result.append(dirpath + '/' + filename)

    if len(result) == 0:
        raise PathFileNotFound(
            f'error: check that the path is correct ({root_dir}) or files pattern is correct ({pattern})')

    return sorted(result)


def read_files_into_df(fname_list, column_list, dtype_columns={}):
    data_itog = pd.DataFrame()
    epoch_start = pd.Timestamp('2000-01-01')

    for fname in fname_list:
        data = pd.read_csv(fname, sep=r'\s+', dtype=str)
        data = data.dropna()
        data = data[column_list]

        if 'TIME' in column_list:
            # convert TIME value to human-readable timestamp (sinse epoch 01.01.2000)
            time = data['TIME'].astype(float)
            tstamp = epoch_start + pd.to_timedelta(time, unit='s')
            timestamp = tstamp.dt.strftime(tstamp_s)
            data['timestamp'] = timestamp

            # clear dataframe rows where time value == 0
            data['time'] = time
            data_clear = data.query('time != 0.0')

        data_itog = pd.concat([data_itog, data_clear], ignore_index=True)

    return data_itog


def collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog):
    patterns_wheel = [r'mvn_wheel_brd01_(.*)', r'mvn_wheel_brd02_(.*)', r'mvn_wheel_brd03_(.*)',
                      r'mvn_wheel_brd04_(.*)']

    for pattern in patterns_wheel:
        fname = path_itog_brd_data + pattern[:15] + '.csv'
        try:
            found_files = find_required_files(root_dir_wheel_data, pattern)
            data = read_files_into_df(found_files, column_list, dtype_columns={0: float, 1: int})
        except KeyError as e:
            print(
                f'error in collect_tm_brd_wheel_data: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})')
            break
        except Exception as e:
            print(f'error in collect_tm_brd_wheel_data: {e}')
            break

        mask = data['STATE'] == '0'
        data = data[mask]
        data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig')
        print('data saved: ' + fname)


if __name__ == "__main__":

    if len(sys.argv) != 2:
        print("Usage: python tm_wheel_parser.py /path/to/tm_brd_data/")
    else:
        root_dir_wheel_data = sys.argv[1]

        ### collect raw tm wheel data into one file for each brd ###
        print('collect raw tm wheel data into one file for each brd')

        column_list = ['TIME', 'STATE']
        column_list_itog = ['TIME', 'timestamp', 'STATE']

        # collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog)

        ## parse and plot wheel csv data
        print('parse and plot wheel csv data')

        border_clr_wheel = 1
        fname = path_itog_brd_data + 'mvn_wheel_brd01.csv'
        wheel_df = pd.read_csv(fname, sep=';', parse_dates=['timestamp'], date_format="%d.%m.%Y %H:%M:%S.%f")
        ## diff between 0 and 0 - 30 sec
        wheel_df['TIME_diff'] = wheel_df['TIME'].diff()

        ## sampling decimation in order to get period 60 sec
        wheel_df = wheel_df.iloc[1::2]
        wheel_df['TIME_period'] = wheel_df['TIME'].diff()
        # print(wheel_df)
        median_tdiff = wheel_df['TIME_period'].median()
        # print(median_tdiff)

        ## discard outliers of the measured wheel period
        wheel_df = wheel_df[
                (wheel_df['TIME_period'] > median_tdiff - border_clr_wheel) &
                (wheel_df['TIME_period'] < median_tdiff + border_clr_wheel)]

        median = wheel_df['TIME_period'].median()

        rows, cols = wheel_df.shape
        median = pd.Series([median] * rows)

        date_format = dates.DateFormatter(ox_date_format)
        datetime_format = dates.DateFormatter(ox_dtime_format)

        fig, axes = plt.subplots(3, 1, figsize=(18, 20), dpi=300, height_ratios=[1, 1, 1])

        axes[0].plot(wheel_df['timestamp'], wheel_df['TIME_period'], '.',
                markersize=5)
        axes[0].plot(wheel_df['timestamp'], median)
        axes[0].set_title("")
        axes[0].set_xlabel("Время (ДД.MM.ГГГГ)", fontsize=font)
        axes[0].set_ylabel("Полупериод, сек", fontsize=font)
        axes[0].grid(True)
        axes[0].xaxis.set_major_formatter(date_format)
        axes[0].tick_params(axis="both", width=1, labelsize=font)

        axes[1].plot(wheel_df['timestamp'][0:400], wheel_df['TIME_period'][0:400], '.', markersize=10)
        axes[1].set_title("")
        axes[1].set_xlabel("Время (ЧЧ:ММ)", fontsize=font)
        axes[1].set_ylabel("Полупериод, сек", fontsize=font)
        axes[1].grid(True)
        axes[1].xaxis.set_major_formatter(datetime_format)
        axes[1].tick_params(axis="both", width=1, labelsize=font)

        sns.histplot(wheel_df['TIME_period'], kde=False, bins=300, ax=axes[2], color='red')
        axes[2].set_title("")
        axes[2].set_xlabel("Полупериод, сек", fontsize=font)
        axes[2].set_ylabel("Частота встречаемости", fontsize=font)
        axes[2].grid(True)
        axes[2].tick_params(axis="both", width=1, labelsize=font)

        fig.savefig(pict_name)

        plt.show()