12
12
13
13
pic_prefix = '../../pic/'
14
14
15
-
16
15
class ChairAnalyser :
17
16
18
17
def __init__ (self ,
19
- folder ,
18
+ df ,
20
19
measurement_interval ,
21
20
pic_prefix ,
22
21
measurements_per_batch = 1000 ,
23
22
name = None ,
24
23
):
25
- self .folder = folder
24
+ self .df_total = df
26
25
self .measurement_interval = measurement_interval
27
26
self .pic_prefix = pic_prefix
28
27
self .measurements_per_batch = measurements_per_batch
29
- if name is not None :
30
- self .name = name
31
- else :
32
- self .name = folder .split ('/' )[- 1 ]
33
-
34
- self .get_df_total ()
35
-
36
- def get_df_total (self ):
37
- folder = self .folder
38
-
39
- filenames_list = os .listdir (folder )
40
- filenames_list = sorted ([int (x ) for x in filenames_list ])
41
- filenames_list = [str (x ) for x in filenames_list ]
42
-
43
- df_total = None
44
-
45
- for filename in filenames_list :
46
- print (filename )
47
-
48
- # dicts_list = joblib.load(folder + '/' + filename)
49
- dicts_list = []
50
- with open (folder + '/' + filename ) as f :
51
- lines = f .readlines ()
52
- # print(len(lines))
53
- if len (lines ) == 0 :
54
- continue
55
-
56
- for line in lines :
57
- try :
58
- new_dict = json .loads (line )
59
- new_dict ['datetime_now' ] = self .parse_string_iso_format (new_dict ['datetime_now' ])
60
- dicts_list .append (new_dict )
61
- except :
62
- break
63
-
64
- df2append = pd .DataFrame (dicts_list )
65
-
66
- if df_total is None :
67
- df_total = df2append
68
- else :
69
- df_total = pd .concat ([df_total , df2append ], axis = 0 )
70
-
71
- rename_dict = {
72
- 'accelerometer_x' : 'Acc_x' ,
73
- 'accelerometer_y' : 'Acc_y' ,
74
- 'accelerometer_z' : 'Acc_z' ,
75
- 'magnetometer_x' : 'Mag_x' ,
76
- 'magnetometer_y' : 'Mag_y' ,
77
- 'magnetometer_z' : 'Mag_z' ,
78
- b'accelerometer_x' : 'Acc_x' ,
79
- b'accelerometer_y' : 'Acc_y' ,
80
- b'accelerometer_z' : 'Acc_z' ,
81
- b'magnetometer_x' : 'Mag_x' ,
82
- b'magnetometer_y' : 'Mag_y' ,
83
- b'magnetometer_z' : 'Mag_z' ,
84
- }
85
- if df_total is not None :
86
- df_total .rename (columns = rename_dict , inplace = True )
87
- df_total .reset_index (inplace = True , drop = True )
88
-
89
- self .df_total = df_total
90
-
91
- # df_total = get_df_total(folder='ivan_0')
28
+ self .name = name
29
+
30
+ self .means , self .stds , medians = self .create_mean_stds ()
31
+
32
+
92
33
def plot_measurements_timeline (
93
34
self ,
94
- sensors = ('accel ' , 'gyro' , 'mag' ),
35
+ sensors = ('acc ' , 'gyro' , 'mag' ),
95
36
axes = ('x' , 'y' , 'z' ),
96
- # filename='measurements_timeline',
37
+ plot_suptitle = True ,
38
+ fontsize = 18 ,
97
39
):
98
40
df = self .df_total
99
41
name = self .name
@@ -102,7 +44,8 @@ def plot_measurements_timeline(
102
44
n_cols = len (sensors )
103
45
n_rows = len (axes )
104
46
105
- fig , ax = plt .subplots (n_rows , n_cols , sharex = 'col' , figsize = (19 , 11 ))
47
+ fig , ax = plt .subplots (n_rows , n_cols , sharex = 'col' , figsize = (14 , 9.5 ))
48
+
106
49
107
50
for n_row , n_col in itertools .product (range (n_rows ), range (n_cols )):
108
51
ax_instance = ax [n_row , n_col ]
@@ -111,153 +54,98 @@ def plot_measurements_timeline(
111
54
data2plot = df .loc [:, column_name ].values
112
55
113
56
ax_instance .plot (data2plot )
57
+ # plt.xticks(fontsize=fontsize - 2)
58
+ # plt.yticks(fontsize=fontsize - 2)
114
59
115
60
if n_row == 0 :
116
61
title = sensors [n_col ]
117
- ax_instance .set_title (title )
62
+ if title == 'acc' :
63
+ title = 'Accelerometer'
64
+ elif title == 'gyro' :
65
+ title = 'Gyroscope'
66
+ ax_instance .set_title (title , fontsize = fontsize )
118
67
119
68
if n_col == 0 :
120
69
title = axes [n_row ]
121
- ax_instance .set_ylabel (title )
70
+ ax_instance .set_ylabel (title , fontsize = fontsize )
71
+
72
+ if plot_suptitle :
73
+ suptitle = f'measurement_interval = { measurement_interval } '
122
74
123
- zeros_portions = self .get_zeros_portion ()
124
- mag_zeros_portion = zeros_portions [['mag_x' , 'mag_y' , 'mag_z' ]].mean ()
125
- mag_zeros_string = f'Mag zeros portion = { round (mag_zeros_portion , 3 )} '
75
+ if 'mag' in sensors :
76
+ zeros_portions = self .get_zeros_portion ()
77
+ mag_zeros_portion = zeros_portions [['mag_x' , 'mag_y' , 'mag_z' ]].mean ()
78
+ if plot_suptitle :
79
+ mag_zeros_string = f'Mag zeros portion = { round (mag_zeros_portion , 3 )} '
80
+ suptitle = suptitle + ', ' + mag_zeros_string
81
+
82
+ if plot_suptitle :
83
+ plt .suptitle (suptitle , fontsize = fontsize + 2 )
126
84
127
- suptitle = f'measurement_interval = { measurement_interval } , ' + mag_zeros_string
128
- plt .suptitle (suptitle )
129
- # plt.tight_layout(rect=[0, 0, 1, 0.5])
130
85
fig .tight_layout (rect = [0 , 0.00 , 1 , 0.97 ])
131
- # fig.subplots_adjust(top=0.85)
132
- # plt.savefig(pic_prefix + filename)
133
86
plt .savefig (pic_prefix + f'measurements_timeline_{ name } .png' )
134
87
plt .close ()
135
88
136
- # create means_stds ?
89
+ def create_mean_stds (self , columns = ('acc_x' , 'acc_y' , 'acc_z' , 'gyro_x' , 'gyro_y' , 'gyro_z' )):
90
+ df_chair = self .df_total .loc [:, columns ]
91
+ # df_chair = df_chair.loc[:, columns]
92
+ # medians, lower_bounds, upper_bounds = np.percentile(df_chair, [50, percentile2crop, 100 - percentile2crop], axis=0)
137
93
138
- # def get_lean_back_portion(acc_z, means_stds=means_stds, n_sigma=5):
139
- def get_lean_back_portion (self , acc_z , acc_z_mean = - 15910 , acc_z_std = 30 , n_sigma = 3 ):
140
- # result = {}
141
- # acc_z_mean = means_stds.loc['Acc_z', 'mean']
142
- # acc_z_std = means_stds.loc['Acc_z', 'std']
94
+ means = df_chair .mean (axis = 0 )
95
+ medians = df_chair .median (axis = 0 )
96
+ stds = df_chair .std (axis = 0 )
97
+
98
+ return means , stds , medians
99
+
100
+ def get_nonstationary_values_portion (self , n_sigma = 3 ):
101
+ means = self .means
102
+ stds = self .stds
143
103
144
- acc_z_min = acc_z_mean - n_sigma * acc_z_std
145
- acc_z_max = acc_z_mean + n_sigma * acc_z_std
104
+ columns = stds . index
105
+ df_chair = self . df_total . loc [:, columns ]
146
106
147
- lean_back_portion = (acc_z < acc_z_min ).mean ()
148
- # result['lean_back_portion'] = lean_back_portion
107
+ lower_bounds = means - n_sigma * stds
108
+ upper_bounds = means + n_sigma * stds
109
+
110
+ low_values_means = (df_chair .loc [:, columns ] < lower_bounds ).mean ()
111
+ high_values_means = (df_chair .loc [:, columns ] > upper_bounds ).mean ()
112
+
113
+ nonstationary_values_portion = low_values_means + high_values_means
114
+ nonstationary_values_portion .index = [colname + '__nonstationary_portion' for colname in nonstationary_values_portion .index ]
115
+ nonstationary_values_portion .name = self .name
116
+
117
+ return nonstationary_values_portion
118
+
119
+ # def get_lean_back_portion(acc_z, means_stds=means_stds, n_sigma=5):
120
+ def get_lean_back_portion (self , acc_z_threshold = 0.97 ):
121
+ df_chair = self .df_total
122
+ lean_back_portion = (df_chair [['acc_z' ]] < acc_z_threshold ).mean ()
123
+ lean_back_portion .index = ['lean_back_portion' ]
124
+ lean_back_portion .name = self .name
149
125
150
- # return result
151
126
return lean_back_portion
152
127
153
- def get_mess_mask_acc (self , acc_data , percentile2crop = 10 , n_sigma = 10 ):
154
- lower_bound , upper_bound , median = np .percentile (acc_data , [percentile2crop , 100 - percentile2crop , 50 ])
155
- acc_data_filtered = acc_data [(lower_bound < acc_data ) & (acc_data < upper_bound )]
156
- std = np .std (acc_data_filtered )
157
- oscillation = std / (25 * n_sigma )
158
-
159
- # Calculating bound for calm state
160
- calm_state_lower_bound = median - n_sigma * std
161
- calm_state_upper_bound = median + n_sigma * std
162
-
163
- mask_calm = ((calm_state_lower_bound < acc_data ) & (acc_data < calm_state_upper_bound )).values
164
- # mess_portion = 1 - np.mean(mask_calm)
165
-
166
- # return mess_portion
167
- return mask_calm , oscillation
168
-
169
- def get_mess_mask_mag (self , mag_data , w = 0.05 , max_calm_derivative = 30 ):
170
- # Spline approximation
171
- y = mag_data .values
172
- x = np .arange (len (y ))
173
- splines = splrep (x , y , w = w * np .ones_like (y ))
174
- points = splev (x , splines , der = 0 )
175
- derivatives = splev (x , splines , der = 1 )
176
-
177
- mask_calm = abs (derivatives ) < max_calm_derivative
178
-
179
- # return points, derivatives
180
- return mask_calm
181
-
182
- def get_mess_mask_mag4graph (self , mag_data , w = 0.05 , max_calm_derivative = 30 ):
183
- # Spline approximation
184
- y = mag_data .values
185
- x = np .arange (len (y ))
186
- splines = splrep (x , y , w = w * np .ones_like (y ))
187
- points = splev (x , splines , der = 0 )
188
- derivatives = splev (x , splines , der = 1 )
189
-
190
- mask_calm = abs (derivatives ) < max_calm_derivative
191
-
192
- return points , derivatives
193
-
194
- def get_chair_stats (self ):
195
- df_total = self .df_total
196
- # results_list = []
197
-
198
- mask_calm_acc_x , oscillation_acc_x = self .get_mess_mask_acc (df_total ['Acc_x' ])
199
- mask_calm_acc_y , oscillation_acc_y = self .get_mess_mask_acc (df_total ['Acc_y' ])
200
- mask_calm_acc_z , oscillation_acc_z = self .get_mess_mask_acc (df_total ['Acc_z' ])
201
-
202
- mess_portion_acc_x = 1 - mask_calm_acc_x .mean ()
203
- mess_portion_acc_y = 1 - mask_calm_acc_y .mean ()
204
- mess_portion_acc_z = 1 - mask_calm_acc_z .mean ()
205
-
206
- mess_portion_acc = (oscillation_acc_x + oscillation_acc_y + oscillation_acc_z ) / 3
207
-
208
- mask_calm_acc = mask_calm_acc_x & mask_calm_acc_y & mask_calm_acc_z
209
- mess_portion_acc = 1 - mask_calm_acc .mean ()
210
-
211
- mask_calm_mag_x = self .get_mess_mask_mag (df_total ['Mag_x' ])
212
- mask_calm_mag_y = self .get_mess_mask_mag (df_total ['Mag_y' ])
213
- mask_calm_mag_z = self .get_mess_mask_mag (df_total ['Mag_z' ])
214
-
215
- mess_portion_mag_x = 1 - mask_calm_mag_x .mean ()
216
- mess_portion_mag_y = 1 - mask_calm_mag_y .mean ()
217
- mess_portion_mag_z = 1 - mask_calm_mag_z .mean ()
218
-
219
- mask_calm_mag = mask_calm_mag_x & mask_calm_mag_y & mask_calm_mag_z
220
- mess_portion_mag = 1 - mask_calm_mag .mean ()
221
-
222
- lean_back_portion = self .get_lean_back_portion (df_total ['Acc_z' ])
223
- result = {
224
- # 'people_id': people_id,
225
- 'mess_portion_acc_x' : mess_portion_acc_x ,
226
- 'mess_portion_acc_y' : mess_portion_acc_y ,
227
- 'mess_portion_acc_z' : mess_portion_acc_z ,
228
- 'mess_portion_acc' : mess_portion_acc ,
229
- 'lean_back_portion' : lean_back_portion ,
230
- 'mess_portion_mag_x' : mess_portion_mag_x ,
231
- 'mess_portion_mag_y' : mess_portion_mag_y ,
232
- 'mess_portion_mag_z' : mess_portion_mag_z ,
233
- 'mess_portion_mag' : mess_portion_mag ,
234
- 'oscillation_acc_x' : oscillation_acc_x ,
235
- 'oscillation_acc_y' : oscillation_acc_y ,
236
- 'oscillation_acc_z' : oscillation_acc_z ,
237
- 'oscillation_acc' : oscillation_acc_z ,
238
- # 'stress': stress,
239
- }
240
- # results_list.append(result)
241
-
242
- # return results_list
243
- return result
128
+ def get_oscillation_intensity (self , percentile2crop = 10 , columns = ('acc_x' , 'acc_y' , 'acc_z' , 'gyro_x' , 'gyro_y' , 'gyro_z' )):
129
+ df_chair = self .df_total .loc [:, columns ]
130
+ result = {}
244
131
245
- def get_chair_stats_truncated (self ):
246
- # self.get_df_total()
247
- self .plot_measurements_timeline ()
248
- chair_stats_detailed = self .get_chair_stats ()
132
+ for column in columns :
133
+ lower_bounds , upper_bounds = np .percentile (df_chair .loc [:, column ], [percentile2crop , 100 - percentile2crop ], axis = 0 )
134
+ # intervals = upper_bounds - lower_bounds
135
+ low_values_mask = (df_chair .loc [:, column ] < lower_bounds )
136
+ high_values_mask = (df_chair .loc [:, column ] > upper_bounds )
249
137
250
- rename_dict = {
251
- 'mess_portion_acc' : 'Momentum' ,
252
- 'mess_portion_mag' : 'Rotational movement' ,
253
- 'lean_back_portion' : 'Lean back' ,
254
- 'oscillation_acc' : 'Oscillation' ,
255
- }
138
+ normal_values_mask = (~ low_values_mask ) & (~ high_values_mask )
256
139
257
- chair_stats_detailed_truncated = {rename_dict [key ]: chair_stats_detailed [key ] for key in rename_dict if
258
- key in rename_dict }
140
+ usual_sitting_stds = df_chair .loc [normal_values_mask , column ].std ()
141
+ oscillations = usual_sitting_stds # / intervals
142
+ feature_name = f'{ column } __oscillations'
143
+ result [feature_name ] = oscillations
259
144
260
- return chair_stats_detailed_truncated
145
+ result = pd .Series (result )
146
+ result .name = self .name
147
+
148
+ return result
261
149
262
150
def plot_measurement_times (self ): # , filename='time_wrt_step.png'):
263
151
df = self .df_total
@@ -268,8 +156,8 @@ def plot_measurement_times(self): # , filename='time_wrt_step.png'):
268
156
n_batches = n_measurements // self .measurements_per_batch
269
157
name = self .name
270
158
271
- timestamp_start = df ['datetime_now ' ].min ().timestamp ()
272
- time_passed = df ['datetime_now ' ].apply (lambda x : x .timestamp () - timestamp_start )
159
+ timestamp_start = df ['time ' ].min ().timestamp ()
160
+ time_passed = df ['time ' ].apply (lambda x : x .timestamp () - timestamp_start )
273
161
274
162
# index2drop = range(measurements_per_batch, n_measurements, measurements_per_batch)
275
163
# time_passed_truncated = time_passed.drop(index2drop, axis=0)
@@ -297,7 +185,7 @@ def plot_measurement_times(self): # , filename='time_wrt_step.png'):
297
185
plt .savefig (pic_prefix + f'time_wrt_step_{ name } .png' )
298
186
299
187
def get_zeros_portion (self ):
300
- df = self .df_total .drop ('datetime_now ' , axis = 1 )
188
+ df = self .df_total .drop ('time ' , axis = 1 )
301
189
zeros_portions = (df == 0 ).mean (axis = 0 )
302
190
303
191
return zeros_portions
@@ -306,3 +194,6 @@ def get_zeros_portion(self):
306
194
def parse_string_iso_format (s ):
307
195
d = dateutil .parser .parse (s )
308
196
return d
197
+
198
+
199
+
0 commit comments