import pandas as pd import os import re from pathlib import Path import matplotlib.pyplot as plt from datetime import datetime, timedelta tstamp_s = '%d.%m.%Y %H:%M:%S.%f' ox_dtime_format = '%d.%m.%Y %H:%M' path_itog_brd_data = '../data/brd_data/' class PathFileNotFound(Exception): pass def find_required_files(root_dir, pattern): result = [] for dirpath, _, filenames in os.walk(root_dir): for filename in filenames: match = re.match(pattern, filename) if match: result.append(dirpath + '/' + filename) if len(result) == 0: raise PathFileNotFound(f'error: check that the path is correct ({root_dir}) or files pattern is correct ({pattern})') return sorted(result) def read_files_into_df(fname_list, column_list, dtype_columns={}): data_itog = pd.DataFrame() epoch_start = pd.Timestamp('2000-01-01') for fname in fname_list: data = pd.read_csv(fname, sep=r'\s+', dtype=str) data = data.dropna() data = data[column_list] if 'TIME' in column_list: # convert TIME value to human-readable timestamp (sinse epoch 01.01.2000) time = data['TIME'].astype(float) tstamp = epoch_start + pd.to_timedelta(time, unit='s') timestamp = tstamp.dt.strftime(tstamp_s) data['timestamp'] = timestamp # clear dataframe rows where time value == 0 data['time'] = time data_clear = data.query('time != 0.0') data_itog = pd.concat([data_itog, data_clear], ignore_index=True) return data_itog def collect_tm_brd_files(root_dir_tm_data, column_list, column_list_itog): patterns_tm = [r'mvn_tm_brd01_(.*)', r'mvn_tm_brd02_(.*)', r'mvn_tm_brd03_(.*)', r'mvn_tm_brd04_(.*)'] for pattern in patterns_tm: fname = path_itog_brd_data + pattern[:12] + '.csv' try: found_files = find_required_files(root_dir_tm_data, pattern) data = read_files_into_df(found_files, column_list, dtype_columns={11: float}) except KeyError as e: print(f'error in collect_tm_brd_files: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})') break except Exception as e: print(f'error in collect_tm_brd_files: {e}') break data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig') print('data saved: ' + fname) def collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog): patterns_wheel = [r'mvn_wheel_brd01_(.*)', r'mvn_wheel_brd02_(.*)', r'mvn_wheel_brd03_(.*)', r'mvn_wheel_brd04_(.*)'] for pattern in patterns_wheel: fname = path_itog_brd_data + pattern[:15] + '.csv' try: found_files = find_required_files(root_dir_wheel_data, pattern) data = read_files_into_df(found_files, column_list, dtype_columns={0: float, 1: int}) except KeyError as e: print(f'error in collect_tm_brd_wheel_data: the specified column name was not found in the data file (path: {root_dir_tm_data}) ({e})') break except Exception as e: print(f'error in collect_tm_brd_wheel_data: {e}') break mask = data['STATE'] == '0' data = data[mask] data.to_csv(fname, index=False, sep=';', columns=column_list_itog, encoding='utf-8-sig') print('data saved: ' + fname) ## collect raw tm brd data into one file for each brd root_dir_tm_data = '/home/danila/Danila/work/MVN/flight/brd_data/arch_for_MB/archive_tm_data_txt/' column_list = ['TIME', 'PER_1Hz', 'ST_HV'] column_list_itog = ['TIME', 'timestamp', 'PER_1Hz', 'ST_HV'] collect_tm_brd_files(root_dir_tm_data, column_list, column_list_itog) root_dir_wheel_data = '/home/danila/Danila/work/MVN/flight/brd_data/arch_for_MB/archive_wheel_data_txt/' column_list = ['TIME', 'STATE'] column_list_itog = ['TIME', 'timestamp', 'STATE'] collect_tm_brd_wheel_data(root_dir_wheel_data, column_list, column_list_itog) ## plot 'evolution' 1 Hz from tm brd data fname = path_itog_brd_data + 'mvn_tm_brd01.csv' dateparse = lambda x: datetime.strptime(x, tstamp_s) df = pd.read_csv(fname, sep=';', parse_dates=['timestamp'], date_parser=dateparse) plt.plot(df['timestamp'], df['PER_1Hz'], '.') plt.show() border_clr_wheel = 2 fname = path_itog_brd_data + 'mvn_wheel_brd01.csv' wheel_df = pd.read_csv(fname, sep=';') wheel_df['TIME_diff'] = wheel_df['TIME'].diff() median_tdiff = wheel_df['TIME_diff'].median() wheel_df_clear = wheel_df[(wheel_df['TIME_diff'] > median_tdiff - border_clr_wheel) & (wheel_df['TIME_diff'] < median_tdiff + border_clr_wheel)] wheel_df_peaks = wheel_df[(wheel_df['TIME_diff'] <= median_tdiff - border_clr_wheel) | (wheel_df['TIME_diff'] >= median_tdiff + border_clr_wheel)] plt.plot(wheel_df_clear['TIME'], wheel_df_clear['TIME_diff']) plt.show() # df1 = df[df['TIME_diff'] < 30.6] # print(df[df['TIME_diff'] > 30.6 or df['TIME_diff'] < 29.4] ) # for idx, row in df.iterrows(): # print(row['TIME'])