Skip to content

Commit c945502

Browse files
committed
stable version
1 parent 9bc9a66 commit c945502

File tree

3 files changed

+171
-211
lines changed

3 files changed

+171
-211
lines changed

ChairAnalyzer.py

Lines changed: 88 additions & 197 deletions
Original file line numberDiff line numberDiff line change
@@ -12,88 +12,30 @@
1212

1313
pic_prefix = '../../pic/'
1414

15-
1615
class ChairAnalyser:
1716

1817
def __init__(self,
19-
folder,
18+
df,
2019
measurement_interval,
2120
pic_prefix,
2221
measurements_per_batch=1000,
2322
name=None,
2423
):
25-
self.folder = folder
24+
self.df_total = df
2625
self.measurement_interval = measurement_interval
2726
self.pic_prefix = pic_prefix
2827
self.measurements_per_batch = measurements_per_batch
29-
if name is not None:
30-
self.name = name
31-
else:
32-
self.name = folder.split('/')[-1]
33-
34-
self.get_df_total()
35-
36-
def get_df_total(self):
37-
folder = self.folder
38-
39-
filenames_list = os.listdir(folder)
40-
filenames_list = sorted([int(x) for x in filenames_list])
41-
filenames_list = [str(x) for x in filenames_list]
42-
43-
df_total = None
44-
45-
for filename in filenames_list:
46-
print(filename)
47-
48-
# dicts_list = joblib.load(folder + '/' + filename)
49-
dicts_list = []
50-
with open(folder + '/' + filename) as f:
51-
lines = f.readlines()
52-
# print(len(lines))
53-
if len(lines) == 0:
54-
continue
55-
56-
for line in lines:
57-
try:
58-
new_dict = json.loads(line)
59-
new_dict['datetime_now'] = self.parse_string_iso_format(new_dict['datetime_now'])
60-
dicts_list.append(new_dict)
61-
except:
62-
break
63-
64-
df2append = pd.DataFrame(dicts_list)
65-
66-
if df_total is None:
67-
df_total = df2append
68-
else:
69-
df_total = pd.concat([df_total, df2append], axis=0)
70-
71-
rename_dict = {
72-
'accelerometer_x': 'Acc_x',
73-
'accelerometer_y': 'Acc_y',
74-
'accelerometer_z': 'Acc_z',
75-
'magnetometer_x': 'Mag_x',
76-
'magnetometer_y': 'Mag_y',
77-
'magnetometer_z': 'Mag_z',
78-
b'accelerometer_x': 'Acc_x',
79-
b'accelerometer_y': 'Acc_y',
80-
b'accelerometer_z': 'Acc_z',
81-
b'magnetometer_x': 'Mag_x',
82-
b'magnetometer_y': 'Mag_y',
83-
b'magnetometer_z': 'Mag_z',
84-
}
85-
if df_total is not None:
86-
df_total.rename(columns=rename_dict, inplace=True)
87-
df_total.reset_index(inplace=True, drop=True)
88-
89-
self.df_total = df_total
90-
91-
# df_total = get_df_total(folder='ivan_0')
28+
self.name = name
29+
30+
self.means, self.stds, medians = self.create_mean_stds()
31+
32+
9233
def plot_measurements_timeline(
9334
self,
94-
sensors=('accel', 'gyro', 'mag'),
35+
sensors=('acc', 'gyro', 'mag'),
9536
axes=('x', 'y', 'z'),
96-
# filename='measurements_timeline',
37+
plot_suptitle=True,
38+
fontsize=18,
9739
):
9840
df = self.df_total
9941
name = self.name
@@ -102,7 +44,8 @@ def plot_measurements_timeline(
10244
n_cols = len(sensors)
10345
n_rows = len(axes)
10446

105-
fig, ax = plt.subplots(n_rows, n_cols, sharex='col', figsize=(19, 11))
47+
fig, ax = plt.subplots(n_rows, n_cols, sharex='col', figsize=(14, 9.5))
48+
10649

10750
for n_row, n_col in itertools.product(range(n_rows), range(n_cols)):
10851
ax_instance = ax[n_row, n_col]
@@ -111,153 +54,98 @@ def plot_measurements_timeline(
11154
data2plot = df.loc[:, column_name].values
11255

11356
ax_instance.plot(data2plot)
57+
# plt.xticks(fontsize=fontsize - 2)
58+
# plt.yticks(fontsize=fontsize - 2)
11459

11560
if n_row == 0:
11661
title = sensors[n_col]
117-
ax_instance.set_title(title)
62+
if title == 'acc':
63+
title = 'Accelerometer'
64+
elif title == 'gyro':
65+
title = 'Gyroscope'
66+
ax_instance.set_title(title, fontsize=fontsize)
11867

11968
if n_col == 0:
12069
title = axes[n_row]
121-
ax_instance.set_ylabel(title)
70+
ax_instance.set_ylabel(title, fontsize=fontsize)
71+
72+
if plot_suptitle:
73+
suptitle = f'measurement_interval = {measurement_interval}'
12274

123-
zeros_portions = self.get_zeros_portion()
124-
mag_zeros_portion = zeros_portions[['mag_x', 'mag_y', 'mag_z']].mean()
125-
mag_zeros_string = f'Mag zeros portion = {round(mag_zeros_portion, 3)}'
75+
if 'mag' in sensors:
76+
zeros_portions = self.get_zeros_portion()
77+
mag_zeros_portion = zeros_portions[['mag_x', 'mag_y', 'mag_z']].mean()
78+
if plot_suptitle:
79+
mag_zeros_string = f'Mag zeros portion = {round(mag_zeros_portion, 3)}'
80+
suptitle = suptitle + ', ' + mag_zeros_string
81+
82+
if plot_suptitle:
83+
plt.suptitle(suptitle, fontsize=fontsize + 2)
12684

127-
suptitle = f'measurement_interval = {measurement_interval}, ' + mag_zeros_string
128-
plt.suptitle(suptitle)
129-
# plt.tight_layout(rect=[0, 0, 1, 0.5])
13085
fig.tight_layout(rect=[0, 0.00, 1, 0.97])
131-
# fig.subplots_adjust(top=0.85)
132-
# plt.savefig(pic_prefix + filename)
13386
plt.savefig(pic_prefix + f'measurements_timeline_{name}.png')
13487
plt.close()
13588

136-
# create means_stds ?
89+
def create_mean_stds(self, columns=('acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z')):
90+
df_chair = self.df_total.loc[:, columns]
91+
# df_chair = df_chair.loc[:, columns]
92+
# medians, lower_bounds, upper_bounds = np.percentile(df_chair, [50, percentile2crop, 100 - percentile2crop], axis=0)
13793

138-
# def get_lean_back_portion(acc_z, means_stds=means_stds, n_sigma=5):
139-
def get_lean_back_portion(self, acc_z, acc_z_mean=-15910, acc_z_std=30, n_sigma=3):
140-
# result = {}
141-
# acc_z_mean = means_stds.loc['Acc_z', 'mean']
142-
# acc_z_std = means_stds.loc['Acc_z', 'std']
94+
means = df_chair.mean(axis=0)
95+
medians = df_chair.median(axis=0)
96+
stds = df_chair.std(axis=0)
97+
98+
return means, stds, medians
99+
100+
def get_nonstationary_values_portion(self, n_sigma=3):
101+
means = self.means
102+
stds = self.stds
143103

144-
acc_z_min = acc_z_mean - n_sigma * acc_z_std
145-
acc_z_max = acc_z_mean + n_sigma * acc_z_std
104+
columns = stds.index
105+
df_chair = self.df_total.loc[:, columns]
146106

147-
lean_back_portion = (acc_z < acc_z_min).mean()
148-
# result['lean_back_portion'] = lean_back_portion
107+
lower_bounds = means - n_sigma * stds
108+
upper_bounds = means + n_sigma * stds
109+
110+
low_values_means = (df_chair.loc[:, columns] < lower_bounds).mean()
111+
high_values_means = (df_chair.loc[:, columns] > upper_bounds).mean()
112+
113+
nonstationary_values_portion = low_values_means + high_values_means
114+
nonstationary_values_portion.index = [colname + '__nonstationary_portion' for colname in nonstationary_values_portion.index]
115+
nonstationary_values_portion.name = self.name
116+
117+
return nonstationary_values_portion
118+
119+
# def get_lean_back_portion(acc_z, means_stds=means_stds, n_sigma=5):
120+
def get_lean_back_portion(self, acc_z_threshold=0.97):
121+
df_chair = self.df_total
122+
lean_back_portion = (df_chair[['acc_z']] < acc_z_threshold).mean()
123+
lean_back_portion.index = ['lean_back_portion']
124+
lean_back_portion.name = self.name
149125

150-
# return result
151126
return lean_back_portion
152127

153-
def get_mess_mask_acc(self, acc_data, percentile2crop=10, n_sigma=10):
154-
lower_bound, upper_bound, median = np.percentile(acc_data, [percentile2crop, 100 - percentile2crop, 50])
155-
acc_data_filtered = acc_data[(lower_bound < acc_data) & (acc_data < upper_bound)]
156-
std = np.std(acc_data_filtered)
157-
oscillation = std / (25 * n_sigma)
158-
159-
# Calculating bound for calm state
160-
calm_state_lower_bound = median - n_sigma * std
161-
calm_state_upper_bound = median + n_sigma * std
162-
163-
mask_calm = ((calm_state_lower_bound < acc_data) & (acc_data < calm_state_upper_bound)).values
164-
# mess_portion = 1 - np.mean(mask_calm)
165-
166-
# return mess_portion
167-
return mask_calm, oscillation
168-
169-
def get_mess_mask_mag(self, mag_data, w=0.05, max_calm_derivative=30):
170-
# Spline approximation
171-
y = mag_data.values
172-
x = np.arange(len(y))
173-
splines = splrep(x, y, w=w * np.ones_like(y))
174-
points = splev(x, splines, der=0)
175-
derivatives = splev(x, splines, der=1)
176-
177-
mask_calm = abs(derivatives) < max_calm_derivative
178-
179-
# return points, derivatives
180-
return mask_calm
181-
182-
def get_mess_mask_mag4graph(self, mag_data, w=0.05, max_calm_derivative=30):
183-
# Spline approximation
184-
y = mag_data.values
185-
x = np.arange(len(y))
186-
splines = splrep(x, y, w=w * np.ones_like(y))
187-
points = splev(x, splines, der=0)
188-
derivatives = splev(x, splines, der=1)
189-
190-
mask_calm = abs(derivatives) < max_calm_derivative
191-
192-
return points, derivatives
193-
194-
def get_chair_stats(self):
195-
df_total = self.df_total
196-
# results_list = []
197-
198-
mask_calm_acc_x, oscillation_acc_x = self.get_mess_mask_acc(df_total['Acc_x'])
199-
mask_calm_acc_y, oscillation_acc_y = self.get_mess_mask_acc(df_total['Acc_y'])
200-
mask_calm_acc_z, oscillation_acc_z = self.get_mess_mask_acc(df_total['Acc_z'])
201-
202-
mess_portion_acc_x = 1 - mask_calm_acc_x.mean()
203-
mess_portion_acc_y = 1 - mask_calm_acc_y.mean()
204-
mess_portion_acc_z = 1 - mask_calm_acc_z.mean()
205-
206-
mess_portion_acc = (oscillation_acc_x + oscillation_acc_y + oscillation_acc_z) / 3
207-
208-
mask_calm_acc = mask_calm_acc_x & mask_calm_acc_y & mask_calm_acc_z
209-
mess_portion_acc = 1 - mask_calm_acc.mean()
210-
211-
mask_calm_mag_x = self.get_mess_mask_mag(df_total['Mag_x'])
212-
mask_calm_mag_y = self.get_mess_mask_mag(df_total['Mag_y'])
213-
mask_calm_mag_z = self.get_mess_mask_mag(df_total['Mag_z'])
214-
215-
mess_portion_mag_x = 1 - mask_calm_mag_x.mean()
216-
mess_portion_mag_y = 1 - mask_calm_mag_y.mean()
217-
mess_portion_mag_z = 1 - mask_calm_mag_z.mean()
218-
219-
mask_calm_mag = mask_calm_mag_x & mask_calm_mag_y & mask_calm_mag_z
220-
mess_portion_mag = 1 - mask_calm_mag.mean()
221-
222-
lean_back_portion = self.get_lean_back_portion(df_total['Acc_z'])
223-
result = {
224-
# 'people_id': people_id,
225-
'mess_portion_acc_x': mess_portion_acc_x,
226-
'mess_portion_acc_y': mess_portion_acc_y,
227-
'mess_portion_acc_z': mess_portion_acc_z,
228-
'mess_portion_acc': mess_portion_acc,
229-
'lean_back_portion': lean_back_portion,
230-
'mess_portion_mag_x': mess_portion_mag_x,
231-
'mess_portion_mag_y': mess_portion_mag_y,
232-
'mess_portion_mag_z': mess_portion_mag_z,
233-
'mess_portion_mag': mess_portion_mag,
234-
'oscillation_acc_x': oscillation_acc_x,
235-
'oscillation_acc_y': oscillation_acc_y,
236-
'oscillation_acc_z': oscillation_acc_z,
237-
'oscillation_acc': oscillation_acc_z,
238-
# 'stress': stress,
239-
}
240-
# results_list.append(result)
241-
242-
# return results_list
243-
return result
128+
def get_oscillation_intensity(self, percentile2crop=10, columns=('acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z')):
129+
df_chair = self.df_total.loc[:, columns]
130+
result = {}
244131

245-
def get_chair_stats_truncated(self):
246-
# self.get_df_total()
247-
self.plot_measurements_timeline()
248-
chair_stats_detailed = self.get_chair_stats()
132+
for column in columns:
133+
lower_bounds, upper_bounds = np.percentile(df_chair.loc[:, column], [percentile2crop, 100 - percentile2crop], axis=0)
134+
# intervals = upper_bounds - lower_bounds
135+
low_values_mask = (df_chair.loc[:, column] < lower_bounds)
136+
high_values_mask = (df_chair.loc[:, column] > upper_bounds)
249137

250-
rename_dict = {
251-
'mess_portion_acc': 'Momentum',
252-
'mess_portion_mag': 'Rotational movement',
253-
'lean_back_portion': 'Lean back',
254-
'oscillation_acc': 'Oscillation',
255-
}
138+
normal_values_mask = (~low_values_mask) & (~high_values_mask)
256139

257-
chair_stats_detailed_truncated = {rename_dict[key]: chair_stats_detailed[key] for key in rename_dict if
258-
key in rename_dict}
140+
usual_sitting_stds = df_chair.loc[normal_values_mask, column].std()
141+
oscillations = usual_sitting_stds# / intervals
142+
feature_name = f'{column}__oscillations'
143+
result[feature_name] = oscillations
259144

260-
return chair_stats_detailed_truncated
145+
result = pd.Series(result)
146+
result.name = self.name
147+
148+
return result
261149

262150
def plot_measurement_times(self): # , filename='time_wrt_step.png'):
263151
df = self.df_total
@@ -268,8 +156,8 @@ def plot_measurement_times(self): # , filename='time_wrt_step.png'):
268156
n_batches = n_measurements // self.measurements_per_batch
269157
name = self.name
270158

271-
timestamp_start = df['datetime_now'].min().timestamp()
272-
time_passed = df['datetime_now'].apply(lambda x: x.timestamp() - timestamp_start)
159+
timestamp_start = df['time'].min().timestamp()
160+
time_passed = df['time'].apply(lambda x: x.timestamp() - timestamp_start)
273161

274162
# index2drop = range(measurements_per_batch, n_measurements, measurements_per_batch)
275163
# time_passed_truncated = time_passed.drop(index2drop, axis=0)
@@ -297,7 +185,7 @@ def plot_measurement_times(self): # , filename='time_wrt_step.png'):
297185
plt.savefig(pic_prefix + f'time_wrt_step_{name}.png')
298186

299187
def get_zeros_portion(self):
300-
df = self.df_total.drop('datetime_now', axis=1)
188+
df = self.df_total.drop('time', axis=1)
301189
zeros_portions = (df == 0).mean(axis=0)
302190

303191
return zeros_portions
@@ -306,3 +194,6 @@ def get_zeros_portion(self):
306194
def parse_string_iso_format(s):
307195
d = dateutil.parser.parse(s)
308196
return d
197+
198+
199+

0 commit comments

Comments
 (0)