import pandas as pd import os import re from pathlib import Path import matplotlib.pyplot as plt from matplotlib import dates from datetime import datetime, timedelta import sys import seaborn as sns tstamp_s = '%d.%m.%Y %H:%M:%S.%f' ox_dtime_format = '%H:%M' ox_date_format = '%d.%m.%Y' path_itog_brd_data = '../data/brd_data/' pict_name = '../plots/' + 'MVN_wheel' font = 16 class PathFileNotFound(Exception): pass def find_required_files(root_dir, pattern): result = [] for dirpath, _, filenames in os.walk(root_dir): for filename in filenames: match = re.match(pattern, filename) if match: result.append(dirpath + '/' + filename) if len(result) == 0: raise PathFileNotFound( f'error: check that the path is correct ({root_dir}) or files pattern is correct ({pattern})') return sorted(result) def read_files_into_df(fname_list, column_list, dtype_columns={}): data_itog = pd.DataFrame() epoch_start = pd.Timestamp('2000-01-01') for fname in fname_list: data = pd.read_csv(fname, sep=r'\s+', dtype=str) data = data.dropna() data = data[column_list] if 'TIME' in column_list: # convert TIME value to human-readable timestamp (sinse epoch 01.01.2000) time = data['TIME'].astype(float) tstamp = epoch_start + pd.to_timedelta(time, unit='s') timestamp = tstamp.dt.strftime(tstamp_s) data['timestamp'] = timestamp # clear dataframe rows where time value == 0 data['time'] = time data_clear = data.query('time != 0.0') data_itog = pd.concat([data_itog, data_clear], ignore_index=True) return data_itog def collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog): patterns_wheel = [r'mvn_wheel_brd01_(.*)', r'mvn_wheel_brd02_(.*)', r'mvn_wheel_brd03_(.*)', r'mvn_wheel_brd04_(.*)'] for pattern in patterns_wheel: fname = path_itog_brd_data + pattern[:15] + '.csv' try: found_files = find_required_files(root_dir_wheel_data, pattern) data = read_files_into_df(found_files, column_list, dtype_columns={0: float, 1: int}) except KeyError as e: print( f'error in collect_tm_brd_wheel_data: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})') break except Exception as e: print(f'error in collect_tm_brd_wheel_data: {e}') break mask = data['STATE'] == '0' data = data[mask] data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig') print('data saved: ' + fname) if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python tm_wheel_parser.py /path/to/tm_brd_data/") else: root_dir_wheel_data = sys.argv[1] ### collect raw tm wheel data into one file for each brd ### print('collect raw tm wheel data into one file for each brd') column_list = ['TIME', 'STATE'] column_list_itog = ['TIME', 'timestamp', 'STATE'] # collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog) ## parse and plot wheel csv data print('parse and plot wheel csv data') border_clr_wheel = 1 fname = path_itog_brd_data + 'mvn_wheel_brd01.csv' wheel_df = pd.read_csv(fname, sep=';', parse_dates=['timestamp'], date_format="%d.%m.%Y %H:%M:%S.%f") ## diff between 0 and 0 - 30 sec wheel_df['TIME_diff'] = wheel_df['TIME'].diff() ## sampling decimation in order to get period 60 sec wheel_df = wheel_df.iloc[1::2] wheel_df['TIME_period'] = wheel_df['TIME'].diff() # print(wheel_df) median_tdiff = wheel_df['TIME_period'].median() # print(median_tdiff) ## discard outliers of the measured wheel period wheel_df = wheel_df[ (wheel_df['TIME_period'] > median_tdiff - border_clr_wheel) & (wheel_df['TIME_period'] < median_tdiff + border_clr_wheel)] median = wheel_df['TIME_period'].median() rows, cols = wheel_df.shape median = pd.Series([median] * rows) date_format = dates.DateFormatter(ox_date_format) datetime_format = dates.DateFormatter(ox_dtime_format) fig, axes = plt.subplots(3, 1, figsize=(18, 20), dpi=300, height_ratios=[1, 1, 1]) axes[0].plot(wheel_df['timestamp'], wheel_df['TIME_period'], '.', markersize=5) axes[0].plot(wheel_df['timestamp'], median) axes[0].set_title("") axes[0].set_xlabel("Время (ДД.MM.ГГГГ)", fontsize=font) axes[0].set_ylabel("Полупериод, сек", fontsize=font) axes[0].grid(True) axes[0].xaxis.set_major_formatter(date_format) axes[0].tick_params(axis="both", width=1, labelsize=font) axes[1].plot(wheel_df['timestamp'][0:400], wheel_df['TIME_period'][0:400], '.', markersize=10) axes[1].set_title("") axes[1].set_xlabel("Время (ЧЧ:ММ)", fontsize=font) axes[1].set_ylabel("Полупериод, сек", fontsize=font) axes[1].grid(True) axes[1].xaxis.set_major_formatter(datetime_format) axes[1].tick_params(axis="both", width=1, labelsize=font) sns.histplot(wheel_df['TIME_period'], kde=False, bins=300, ax=axes[2], color='red') axes[2].set_title("") axes[2].set_xlabel("Полупериод, сек", fontsize=font) axes[2].set_ylabel("Частота встречаемости", fontsize=font) axes[2].grid(True) axes[2].tick_params(axis="both", width=1, labelsize=font) fig.savefig(pict_name) plt.show()