|
| 1 | +import joblib |
| 2 | +import numpy as np |
| 3 | +import pandas as pd |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import os |
| 6 | +import sys |
| 7 | +import itertools |
| 8 | +from scipy.interpolate import splev, splrep |
| 9 | +import json |
| 10 | +from datetime import datetime |
| 11 | +import dateutil.parser |
| 12 | + |
| 13 | +pic_prefix = '../../pic/' |
| 14 | + |
| 15 | + |
| 16 | +class ChairAnalyser: |
| 17 | + |
| 18 | + def __init__(self, |
| 19 | + folder, |
| 20 | + measurement_interval, |
| 21 | + pic_prefix, |
| 22 | + measurements_per_batch=1000, |
| 23 | + name=None, |
| 24 | + ): |
| 25 | + self.folder = folder |
| 26 | + self.measurement_interval = measurement_interval |
| 27 | + self.pic_prefix = pic_prefix |
| 28 | + self.measurements_per_batch = measurements_per_batch |
| 29 | + if name is not None: |
| 30 | + self.name = name |
| 31 | + else: |
| 32 | + self.name = folder.split('/')[-1] |
| 33 | + |
| 34 | + self.get_df_total() |
| 35 | + |
| 36 | + def get_df_total(self): |
| 37 | + folder = self.folder |
| 38 | + |
| 39 | + filenames_list = os.listdir(folder) |
| 40 | + filenames_list = sorted([int(x) for x in filenames_list]) |
| 41 | + filenames_list = [str(x) for x in filenames_list] |
| 42 | + |
| 43 | + df_total = None |
| 44 | + |
| 45 | + for filename in filenames_list: |
| 46 | + print(filename) |
| 47 | + |
| 48 | + # dicts_list = joblib.load(folder + '/' + filename) |
| 49 | + dicts_list = [] |
| 50 | + with open(folder + '/' + filename) as f: |
| 51 | + lines = f.readlines() |
| 52 | + # print(len(lines)) |
| 53 | + if len(lines) == 0: |
| 54 | + continue |
| 55 | + |
| 56 | + for line in lines: |
| 57 | + try: |
| 58 | + new_dict = json.loads(line) |
| 59 | + new_dict['datetime_now'] = self.parse_string_iso_format(new_dict['datetime_now']) |
| 60 | + dicts_list.append(new_dict) |
| 61 | + except: |
| 62 | + break |
| 63 | + |
| 64 | + df2append = pd.DataFrame(dicts_list) |
| 65 | + |
| 66 | + if df_total is None: |
| 67 | + df_total = df2append |
| 68 | + else: |
| 69 | + df_total = pd.concat([df_total, df2append], axis=0) |
| 70 | + |
| 71 | + rename_dict = { |
| 72 | + 'accelerometer_x': 'Acc_x', |
| 73 | + 'accelerometer_y': 'Acc_y', |
| 74 | + 'accelerometer_z': 'Acc_z', |
| 75 | + 'magnetometer_x': 'Mag_x', |
| 76 | + 'magnetometer_y': 'Mag_y', |
| 77 | + 'magnetometer_z': 'Mag_z', |
| 78 | + b'accelerometer_x': 'Acc_x', |
| 79 | + b'accelerometer_y': 'Acc_y', |
| 80 | + b'accelerometer_z': 'Acc_z', |
| 81 | + b'magnetometer_x': 'Mag_x', |
| 82 | + b'magnetometer_y': 'Mag_y', |
| 83 | + b'magnetometer_z': 'Mag_z', |
| 84 | + } |
| 85 | + if df_total is not None: |
| 86 | + df_total.rename(columns=rename_dict, inplace=True) |
| 87 | + df_total.reset_index(inplace=True, drop=True) |
| 88 | + |
| 89 | + self.df_total = df_total |
| 90 | + |
| 91 | + # df_total = get_df_total(folder='ivan_0') |
| 92 | + def plot_measurements_timeline( |
| 93 | + self, |
| 94 | + sensors=('accel', 'gyro', 'mag'), |
| 95 | + axes=('x', 'y', 'z'), |
| 96 | + # filename='measurements_timeline', |
| 97 | + ): |
| 98 | + df = self.df_total |
| 99 | + name = self.name |
| 100 | + measurement_interval = self.measurement_interval |
| 101 | + |
| 102 | + n_cols = len(sensors) |
| 103 | + n_rows = len(axes) |
| 104 | + |
| 105 | + fig, ax = plt.subplots(n_rows, n_cols, sharex='col', figsize=(19, 11)) |
| 106 | + |
| 107 | + for n_row, n_col in itertools.product(range(n_rows), range(n_cols)): |
| 108 | + ax_instance = ax[n_row, n_col] |
| 109 | + |
| 110 | + column_name = sensors[n_col] + '_' + axes[n_row] |
| 111 | + data2plot = df.loc[:, column_name].values |
| 112 | + |
| 113 | + ax_instance.plot(data2plot) |
| 114 | + |
| 115 | + if n_row == 0: |
| 116 | + title = sensors[n_col] |
| 117 | + ax_instance.set_title(title) |
| 118 | + |
| 119 | + if n_col == 0: |
| 120 | + title = axes[n_row] |
| 121 | + ax_instance.set_ylabel(title) |
| 122 | + |
| 123 | + zeros_portions = self.get_zeros_portion() |
| 124 | + mag_zeros_portion = zeros_portions[['mag_x', 'mag_y', 'mag_z']].mean() |
| 125 | + mag_zeros_string = f'Mag zeros portion = {round(mag_zeros_portion, 3)}' |
| 126 | + |
| 127 | + suptitle = f'measurement_interval = {measurement_interval}, ' + mag_zeros_string |
| 128 | + plt.suptitle(suptitle) |
| 129 | + # plt.tight_layout(rect=[0, 0, 1, 0.5]) |
| 130 | + fig.tight_layout(rect=[0, 0.00, 1, 0.97]) |
| 131 | + # fig.subplots_adjust(top=0.85) |
| 132 | + # plt.savefig(pic_prefix + filename) |
| 133 | + plt.savefig(pic_prefix + f'measurements_timeline_{name}.png') |
| 134 | + plt.close() |
| 135 | + |
| 136 | + # create means_stds ? |
| 137 | + |
| 138 | + # def get_lean_back_portion(acc_z, means_stds=means_stds, n_sigma=5): |
| 139 | + def get_lean_back_portion(self, acc_z, acc_z_mean=-15910, acc_z_std=30, n_sigma=3): |
| 140 | + # result = {} |
| 141 | + # acc_z_mean = means_stds.loc['Acc_z', 'mean'] |
| 142 | + # acc_z_std = means_stds.loc['Acc_z', 'std'] |
| 143 | + |
| 144 | + acc_z_min = acc_z_mean - n_sigma * acc_z_std |
| 145 | + acc_z_max = acc_z_mean + n_sigma * acc_z_std |
| 146 | + |
| 147 | + lean_back_portion = (acc_z < acc_z_min).mean() |
| 148 | + # result['lean_back_portion'] = lean_back_portion |
| 149 | + |
| 150 | + # return result |
| 151 | + return lean_back_portion |
| 152 | + |
| 153 | + def get_mess_mask_acc(self, acc_data, percentile2crop=10, n_sigma=10): |
| 154 | + lower_bound, upper_bound, median = np.percentile(acc_data, [percentile2crop, 100 - percentile2crop, 50]) |
| 155 | + acc_data_filtered = acc_data[(lower_bound < acc_data) & (acc_data < upper_bound)] |
| 156 | + std = np.std(acc_data_filtered) |
| 157 | + oscillation = std / (25 * n_sigma) |
| 158 | + |
| 159 | + # Calculating bound for calm state |
| 160 | + calm_state_lower_bound = median - n_sigma * std |
| 161 | + calm_state_upper_bound = median + n_sigma * std |
| 162 | + |
| 163 | + mask_calm = ((calm_state_lower_bound < acc_data) & (acc_data < calm_state_upper_bound)).values |
| 164 | + # mess_portion = 1 - np.mean(mask_calm) |
| 165 | + |
| 166 | + # return mess_portion |
| 167 | + return mask_calm, oscillation |
| 168 | + |
| 169 | + def get_mess_mask_mag(self, mag_data, w=0.05, max_calm_derivative=30): |
| 170 | + # Spline approximation |
| 171 | + y = mag_data.values |
| 172 | + x = np.arange(len(y)) |
| 173 | + splines = splrep(x, y, w=w * np.ones_like(y)) |
| 174 | + points = splev(x, splines, der=0) |
| 175 | + derivatives = splev(x, splines, der=1) |
| 176 | + |
| 177 | + mask_calm = abs(derivatives) < max_calm_derivative |
| 178 | + |
| 179 | + # return points, derivatives |
| 180 | + return mask_calm |
| 181 | + |
| 182 | + def get_mess_mask_mag4graph(self, mag_data, w=0.05, max_calm_derivative=30): |
| 183 | + # Spline approximation |
| 184 | + y = mag_data.values |
| 185 | + x = np.arange(len(y)) |
| 186 | + splines = splrep(x, y, w=w * np.ones_like(y)) |
| 187 | + points = splev(x, splines, der=0) |
| 188 | + derivatives = splev(x, splines, der=1) |
| 189 | + |
| 190 | + mask_calm = abs(derivatives) < max_calm_derivative |
| 191 | + |
| 192 | + return points, derivatives |
| 193 | + |
| 194 | + def get_chair_stats(self): |
| 195 | + df_total = self.df_total |
| 196 | + # results_list = [] |
| 197 | + |
| 198 | + mask_calm_acc_x, oscillation_acc_x = self.get_mess_mask_acc(df_total['Acc_x']) |
| 199 | + mask_calm_acc_y, oscillation_acc_y = self.get_mess_mask_acc(df_total['Acc_y']) |
| 200 | + mask_calm_acc_z, oscillation_acc_z = self.get_mess_mask_acc(df_total['Acc_z']) |
| 201 | + |
| 202 | + mess_portion_acc_x = 1 - mask_calm_acc_x.mean() |
| 203 | + mess_portion_acc_y = 1 - mask_calm_acc_y.mean() |
| 204 | + mess_portion_acc_z = 1 - mask_calm_acc_z.mean() |
| 205 | + |
| 206 | + mess_portion_acc = (oscillation_acc_x + oscillation_acc_y + oscillation_acc_z) / 3 |
| 207 | + |
| 208 | + mask_calm_acc = mask_calm_acc_x & mask_calm_acc_y & mask_calm_acc_z |
| 209 | + mess_portion_acc = 1 - mask_calm_acc.mean() |
| 210 | + |
| 211 | + mask_calm_mag_x = self.get_mess_mask_mag(df_total['Mag_x']) |
| 212 | + mask_calm_mag_y = self.get_mess_mask_mag(df_total['Mag_y']) |
| 213 | + mask_calm_mag_z = self.get_mess_mask_mag(df_total['Mag_z']) |
| 214 | + |
| 215 | + mess_portion_mag_x = 1 - mask_calm_mag_x.mean() |
| 216 | + mess_portion_mag_y = 1 - mask_calm_mag_y.mean() |
| 217 | + mess_portion_mag_z = 1 - mask_calm_mag_z.mean() |
| 218 | + |
| 219 | + mask_calm_mag = mask_calm_mag_x & mask_calm_mag_y & mask_calm_mag_z |
| 220 | + mess_portion_mag = 1 - mask_calm_mag.mean() |
| 221 | + |
| 222 | + lean_back_portion = self.get_lean_back_portion(df_total['Acc_z']) |
| 223 | + result = { |
| 224 | + # 'people_id': people_id, |
| 225 | + 'mess_portion_acc_x': mess_portion_acc_x, |
| 226 | + 'mess_portion_acc_y': mess_portion_acc_y, |
| 227 | + 'mess_portion_acc_z': mess_portion_acc_z, |
| 228 | + 'mess_portion_acc': mess_portion_acc, |
| 229 | + 'lean_back_portion': lean_back_portion, |
| 230 | + 'mess_portion_mag_x': mess_portion_mag_x, |
| 231 | + 'mess_portion_mag_y': mess_portion_mag_y, |
| 232 | + 'mess_portion_mag_z': mess_portion_mag_z, |
| 233 | + 'mess_portion_mag': mess_portion_mag, |
| 234 | + 'oscillation_acc_x': oscillation_acc_x, |
| 235 | + 'oscillation_acc_y': oscillation_acc_y, |
| 236 | + 'oscillation_acc_z': oscillation_acc_z, |
| 237 | + 'oscillation_acc': oscillation_acc_z, |
| 238 | + # 'stress': stress, |
| 239 | + } |
| 240 | + # results_list.append(result) |
| 241 | + |
| 242 | + # return results_list |
| 243 | + return result |
| 244 | + |
| 245 | + def get_chair_stats_truncated(self): |
| 246 | + # self.get_df_total() |
| 247 | + self.plot_measurements_timeline() |
| 248 | + chair_stats_detailed = self.get_chair_stats() |
| 249 | + |
| 250 | + rename_dict = { |
| 251 | + 'mess_portion_acc': 'Momentum', |
| 252 | + 'mess_portion_mag': 'Rotational movement', |
| 253 | + 'lean_back_portion': 'Lean back', |
| 254 | + 'oscillation_acc': 'Oscillation', |
| 255 | + } |
| 256 | + |
| 257 | + chair_stats_detailed_truncated = {rename_dict[key]: chair_stats_detailed[key] for key in rename_dict if |
| 258 | + key in rename_dict} |
| 259 | + |
| 260 | + return chair_stats_detailed_truncated |
| 261 | + |
| 262 | + def plot_measurement_times(self): # , filename='time_wrt_step.png'): |
| 263 | + df = self.df_total |
| 264 | + pic_prefix = self.pic_prefix |
| 265 | + measurement_interval = self.measurement_interval |
| 266 | + measurements_per_batch = self.measurements_per_batch |
| 267 | + n_measurements = len(df) |
| 268 | + n_batches = n_measurements // self.measurements_per_batch |
| 269 | + name = self.name |
| 270 | + |
| 271 | + timestamp_start = df['datetime_now'].min().timestamp() |
| 272 | + time_passed = df['datetime_now'].apply(lambda x: x.timestamp() - timestamp_start) |
| 273 | + |
| 274 | + # index2drop = range(measurements_per_batch, n_measurements, measurements_per_batch) |
| 275 | + # time_passed_truncated = time_passed.drop(index2drop, axis=0) |
| 276 | + |
| 277 | + time_between_batches_array = time_passed[measurements_per_batch::measurements_per_batch].values - \ |
| 278 | + time_passed[measurements_per_batch - 1:-1:measurements_per_batch].values |
| 279 | + time_between_batches = time_between_batches_array.mean() |
| 280 | + |
| 281 | + timediff_total = time_passed.iloc[-1] |
| 282 | + timediff_because_of_measurements = timediff_total - time_between_batches_array.sum() |
| 283 | + n_measurements_without_batch = n_measurements - n_batches |
| 284 | + time_between_measurements = timediff_because_of_measurements / n_measurements_without_batch |
| 285 | + |
| 286 | + plt.close() |
| 287 | + plt.figure(figsize=(16, 12)) |
| 288 | + plt.plot(time_passed) |
| 289 | + plt.xlabel('n_step') |
| 290 | + plt.ylabel('Time passed, s') |
| 291 | + title = f'Measurement interval = {round(measurement_interval, 3)}, ' + \ |
| 292 | + f'Time Between Measurements = {round(time_between_measurements, 3)}, ' + \ |
| 293 | + f'Time Between Batches = {round(time_between_batches, 3)}' |
| 294 | + plt.title(title, fontsize=16) |
| 295 | + plt.tight_layout() |
| 296 | + # plt.savefig(pic_prefix + filename) |
| 297 | + plt.savefig(pic_prefix + f'time_wrt_step_{name}.png') |
| 298 | + |
| 299 | + def get_zeros_portion(self): |
| 300 | + df = self.df_total.drop('datetime_now', axis=1) |
| 301 | + zeros_portions = (df == 0).mean(axis=0) |
| 302 | + |
| 303 | + return zeros_portions |
| 304 | + |
| 305 | + @staticmethod |
| 306 | + def parse_string_iso_format(s): |
| 307 | + d = dateutil.parser.parse(s) |
| 308 | + return d |
0 commit comments