161 lines
5.8 KiB
Python
161 lines
5.8 KiB
Python
import pandas as pd
|
||
import os
|
||
import re
|
||
from pathlib import Path
|
||
import matplotlib.pyplot as plt
|
||
from matplotlib import dates
|
||
from datetime import datetime, timedelta
|
||
import sys
|
||
import seaborn as sns
|
||
|
||
tstamp_s = '%d.%m.%Y %H:%M:%S.%f'
|
||
ox_dtime_format = '%H:%M'
|
||
ox_date_format = '%d.%m.%Y'
|
||
|
||
path_itog_brd_data = '../data/brd_data/'
|
||
pict_name = '../plots/' + 'MVN_wheel'
|
||
font = 16
|
||
|
||
|
||
class PathFileNotFound(Exception):
|
||
pass
|
||
|
||
|
||
def find_required_files(root_dir, pattern):
|
||
result = []
|
||
for dirpath, _, filenames in os.walk(root_dir):
|
||
for filename in filenames:
|
||
match = re.match(pattern, filename)
|
||
if match:
|
||
result.append(dirpath + '/' + filename)
|
||
|
||
if len(result) == 0:
|
||
raise PathFileNotFound(
|
||
f'error: check that the path is correct ({root_dir}) or files pattern is correct ({pattern})')
|
||
|
||
return sorted(result)
|
||
|
||
|
||
def read_files_into_df(fname_list, column_list, dtype_columns={}):
|
||
data_itog = pd.DataFrame()
|
||
epoch_start = pd.Timestamp('2000-01-01')
|
||
|
||
for fname in fname_list:
|
||
data = pd.read_csv(fname, sep=r'\s+', dtype=str)
|
||
data = data.dropna()
|
||
data = data[column_list]
|
||
|
||
if 'TIME' in column_list:
|
||
# convert TIME value to human-readable timestamp (sinse epoch 01.01.2000)
|
||
time = data['TIME'].astype(float)
|
||
tstamp = epoch_start + pd.to_timedelta(time, unit='s')
|
||
timestamp = tstamp.dt.strftime(tstamp_s)
|
||
data['timestamp'] = timestamp
|
||
|
||
# clear dataframe rows where time value == 0
|
||
data['time'] = time
|
||
data_clear = data.query('time != 0.0')
|
||
|
||
data_itog = pd.concat([data_itog, data_clear], ignore_index=True)
|
||
|
||
return data_itog
|
||
|
||
|
||
def collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog):
|
||
patterns_wheel = [r'mvn_wheel_brd01_(.*)', r'mvn_wheel_brd02_(.*)', r'mvn_wheel_brd03_(.*)',
|
||
r'mvn_wheel_brd04_(.*)']
|
||
|
||
for pattern in patterns_wheel:
|
||
fname = path_itog_brd_data + pattern[:15] + '.csv'
|
||
try:
|
||
found_files = find_required_files(root_dir_wheel_data, pattern)
|
||
data = read_files_into_df(found_files, column_list, dtype_columns={0: float, 1: int})
|
||
except KeyError as e:
|
||
print(
|
||
f'error in collect_tm_brd_wheel_data: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})')
|
||
break
|
||
except Exception as e:
|
||
print(f'error in collect_tm_brd_wheel_data: {e}')
|
||
break
|
||
|
||
mask = data['STATE'] == '0'
|
||
data = data[mask]
|
||
data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig')
|
||
print('data saved: ' + fname)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
|
||
if len(sys.argv) != 2:
|
||
print("Usage: python tm_wheel_parser.py /path/to/tm_brd_data/")
|
||
else:
|
||
root_dir_wheel_data = sys.argv[1]
|
||
|
||
### collect raw tm wheel data into one file for each brd ###
|
||
print('collect raw tm wheel data into one file for each brd')
|
||
|
||
column_list = ['TIME', 'STATE']
|
||
column_list_itog = ['TIME', 'timestamp', 'STATE']
|
||
|
||
# collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog)
|
||
|
||
## parse and plot wheel csv data
|
||
print('parse and plot wheel csv data')
|
||
|
||
border_clr_wheel = 1
|
||
fname = path_itog_brd_data + 'mvn_wheel_brd01.csv'
|
||
wheel_df = pd.read_csv(fname, sep=';', parse_dates=['timestamp'], date_format="%d.%m.%Y %H:%M:%S.%f")
|
||
## diff between 0 and 0 - 30 sec
|
||
wheel_df['TIME_diff'] = wheel_df['TIME'].diff()
|
||
|
||
## sampling decimation in order to get period 60 sec
|
||
wheel_df = wheel_df.iloc[1::2]
|
||
wheel_df['TIME_period'] = wheel_df['TIME'].diff()
|
||
# print(wheel_df)
|
||
median_tdiff = wheel_df['TIME_period'].median()
|
||
# print(median_tdiff)
|
||
|
||
## discard outliers of the measured wheel period
|
||
wheel_df = wheel_df[
|
||
(wheel_df['TIME_period'] > median_tdiff - border_clr_wheel) &
|
||
(wheel_df['TIME_period'] < median_tdiff + border_clr_wheel)]
|
||
|
||
median = wheel_df['TIME_period'].median()
|
||
|
||
rows, cols = wheel_df.shape
|
||
median = pd.Series([median] * rows)
|
||
|
||
date_format = dates.DateFormatter(ox_date_format)
|
||
datetime_format = dates.DateFormatter(ox_dtime_format)
|
||
|
||
fig, axes = plt.subplots(3, 1, figsize=(18, 20), dpi=300, height_ratios=[1, 1, 1])
|
||
|
||
axes[0].plot(wheel_df['timestamp'], wheel_df['TIME_period'], '.',
|
||
markersize=5)
|
||
axes[0].plot(wheel_df['timestamp'], median)
|
||
axes[0].set_title("")
|
||
axes[0].set_xlabel("Время (ДД.MM.ГГГГ)", fontsize=font)
|
||
axes[0].set_ylabel("Полупериод, сек", fontsize=font)
|
||
axes[0].grid(True)
|
||
axes[0].xaxis.set_major_formatter(date_format)
|
||
axes[0].tick_params(axis="both", width=1, labelsize=font)
|
||
|
||
axes[1].plot(wheel_df['timestamp'][0:400], wheel_df['TIME_period'][0:400], '.', markersize=10)
|
||
axes[1].set_title("")
|
||
axes[1].set_xlabel("Время (ЧЧ:ММ)", fontsize=font)
|
||
axes[1].set_ylabel("Полупериод, сек", fontsize=font)
|
||
axes[1].grid(True)
|
||
axes[1].xaxis.set_major_formatter(datetime_format)
|
||
axes[1].tick_params(axis="both", width=1, labelsize=font)
|
||
|
||
sns.histplot(wheel_df['TIME_period'], kde=False, bins=300, ax=axes[2], color='red')
|
||
axes[2].set_title("")
|
||
axes[2].set_xlabel("Полупериод, сек", fontsize=font)
|
||
axes[2].set_ylabel("Частота встречаемости", fontsize=font)
|
||
axes[2].grid(True)
|
||
axes[2].tick_params(axis="both", width=1, labelsize=font)
|
||
|
||
fig.savefig(pict_name)
|
||
|
||
plt.show()
|