Skip to content

Commit 2f057b3

Browse files
committed
Major update. Splitting code into several separate files
1 parent c945502 commit 2f057b3

File tree

6 files changed

+308
-103
lines changed

6 files changed

+308
-103
lines changed

ChairProcessing.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
import os
5+
import joblib
6+
from utils import normalize_MPU9250_data, split_df
7+
from ChairAnalyzer import ChairAnalyser
8+
9+
plt.interactive(True)
10+
pd.options.display.max_columns = 15
11+
pic_prefix = 'pic/'
12+
# data_path = 'data/CSV'
13+
# data_path = 'Anonimised Data/Data'
14+
15+
data_dict = joblib.load('data/data_dict')
16+
17+
chair_features_list = []
18+
19+
def get_chair_features(df_chair, player_id):
20+
chair_analyser = ChairAnalyser(df_chair, 0.01, pic_prefix, name=player_id) # + f'_{n_chunk}')
21+
nonstationary_values_portion = chair_analyser.get_nonstationary_values_portion()
22+
lean_back_portion = chair_analyser.get_lean_back_portion()
23+
oscillations = chair_analyser.get_oscillation_intensity()
24+
25+
chair_features = pd.concat([nonstationary_values_portion, lean_back_portion, oscillations])
26+
27+
return chair_features
28+
29+
30+
for player_id, player_data_dict in data_dict.items():
31+
if 'schairlog' in player_data_dict:
32+
df_chair = player_data_dict['schairlog']
33+
else:
34+
continue
35+
36+
### Each chair log is splitted to small sessions
37+
chunk_lenght = 100 * 600
38+
df_chunks_list = split_df(df_chair, n_chunks=3, chunk_lenght=chunk_lenght)
39+
# print(len(df_chunks_list))
40+
# chair_analyser = ChairAnalyser(df_chair, 0.01, pic_prefix, name=player_id) # + f'_{n_chunk}')
41+
# chair_analyser.plot_measurements_timeline(sensors=('acc', 'gyro'), plot_suptitle=False, fontsize=22)
42+
43+
for n_chunk, df_chunk in enumerate(df_chunks_list):
44+
chair_features = get_chair_features(df_chunk, player_id)
45+
chair_features_list.append(chair_features)
46+
47+
48+
df_chair_features = pd.DataFrame(chair_features_list)
49+
df_chair_features.reset_index(inplace=True)
50+
df_chair_features.rename(columns={'index': 'player_id'}, inplace=True)
51+
52+
df_chair_features.to_csv('data/chair_features.csv', index=False)

Exploration.py

Lines changed: 18 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,123 +1,39 @@
11
import numpy as np
22
import pandas as pd
33
import matplotlib.pyplot as plt
4+
import seaborn as sns
45
import os
56
from utils import normalize_MPU9250_data, split_df
67
from ChairAnalyzer import ChairAnalyser
78

89
plt.interactive(True)
910
pd.options.display.max_columns = 15
1011
pic_prefix = 'pic/'
11-
data_path = 'data/CSV'
1212

13-
folders = os.listdir(data_path)
14-
folders = [f"{data_path}/{folder}" for folder in folders if not folder.startswith('.')]
1513

16-
data_dict_dict = {}
14+
df_chair_features = pd.read_csv('data/chair_features.csv')
15+
df_players = pd.read_csv('data/players.csv')
16+
df_chair_features4players = pd.merge(df_chair_features, df_players, on='player_id')
1717

18-
chair_data_columns = ['time', 'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'mag_x', 'mag_y', 'mag_z']
1918

20-
data_sources_list = ['schairlog'] # List sources for analysis here
21-
22-
for folder in folders:
23-
data_dict = {}
24-
name = folder.split('/')[-1]
25-
26-
files = os.listdir(folder)
27-
files = [file for file in files if not file.startswith('.')]
28-
data_sources = [file.split('_')[0] for file in files] # There are might be repetitions
29-
print(data_sources)
30-
31-
for file, data_source in zip(files, data_sources):
32-
if data_source not in data_sources_list:
33-
continue
34-
35-
try:
36-
df = pd.read_csv(folder + '/' + file)
37-
38-
if data_source in data_dict: # If already in dict it's appended
39-
new_df = pd.concat([data_dict[data_source], df], axis=0).reset_index(drop=True)
40-
data_dict[data_source] = new_df
41-
else:
42-
data_dict[data_source] = df
43-
except:
44-
pass
45-
46-
data_dict_dict[name] = data_dict
47-
48-
chair_data_dict = {}
49-
50-
for key, value in data_dict_dict.items():
51-
key = key.replace('\t', ' ')
52-
if 'schairlog' in value:
53-
df_chair = value['schairlog']
54-
chair_data_dict[key] = df_chair
55-
print(len(df_chair))
56-
57-
# keys = list(data_dict_dict.keys())
58-
# data_dict_dict[keys[0]]['schairlog']
59-
60-
61-
nonstationary_values_portion_list = []
62-
# TODO: do not draw pictures
63-
64-
for player_name, df_chair in chair_data_dict.items():
65-
chunk_lenght = 100 * 300
66-
df_chunks_list = split_df(df_chair, n_chunks=10, chunk_lenght=chunk_lenght)
67-
print(len(df_chunks_list))
68-
# chair_analyser = ChairAnalyser(df_chair, 0.01, pic_prefix, name=player_name) # + f'_{n_chunk}')
69-
# chair_analyser.plot_measurements_timeline(sensors=('acc', 'gyro'), plot_suptitle=False, fontsize=22)
70-
71-
for n_chunk, df_chunk in enumerate(df_chunks_list):
72-
chair_analyser = ChairAnalyser(df_chunk, 0.01, pic_prefix, name=player_name)# + f'_{n_chunk}')
73-
# chair_analyser.plot_measurements_timeline(sensors=('acc', 'gyro'))
74-
nonstationary_values_portion = chair_analyser.get_nonstationary_values_portion()
75-
lean_back_portion = chair_analyser.get_lean_back_portion()
76-
oscillations = chair_analyser.get_oscillation_intensity()
77-
78-
nonstationary_values_portion = nonstationary_values_portion.append(lean_back_portion)
79-
nonstationary_values_portion = nonstationary_values_portion.append(oscillations)
80-
81-
nonstationary_values_portion_list.append(nonstationary_values_portion)
82-
83-
84-
df_nonstationary_values_portion = pd.DataFrame(nonstationary_values_portion_list)
85-
df_nonstationary_values_portion.reset_index(inplace=True)
86-
df_nonstationary_values_portion.rename(columns={'index': 'player_name'}, inplace=True)
87-
88-
df_players = pd.read_csv('../data/participants2_fixed.csv', sep=';')
89-
df_players['player_name'] = df_players[['First Name', 'Last Name']].apply(lambda x: ' '.join(x), axis=1)
90-
91-
df_players.rename(columns={
92-
' What experience do u have in shooter games (Counter-Strike, Doom, Battlefield, etc.)?': 'Skill'
93-
},
94-
inplace=True,
19+
plt.close()
20+
plt.figure(figsize=(15, 15))
21+
cmap = sns.diverging_palette(220, 10, as_cmap=True)
22+
corr_data = df.drop(['player_id'], axis=1).corr()
23+
corr_data = corr_data.round(1)
24+
sns.heatmap(
25+
corr_data, square=True, cmap=cmap, vmax=1,vmin=-1, linewidths=.2, cbar_kws={"shrink": .8},
26+
annot=True, annot_kws={"size": 7},
27+
# xticklabels=False, yticklabels=False,
9528
)
96-
97-
df_players = df_players[['player_name', 'Skill']]
98-
skill_is_none = df_players['Skill'] == 'None'
99-
df_players.loc[skill_is_none, 'Skill'] = 'Small'
100-
101-
102-
103-
104-
105-
106-
107-
108-
109-
110-
111-
112-
113-
114-
115-
116-
117-
29+
plt.title('Correlation between player skill and his behaviour on the chair', fontsize=15)
30+
plt.tight_layout()
31+
plt.savefig('pic/heatmap_10_3.png')
11832

11933

34+
df.shape
12035

36+
# TODO: make hours binary for multiple thresholds
12137

12238

12339

GameLogProcessing.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
# -*- coding: utf-8 -*-
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import os
6+
import json
7+
import joblib
8+
from utils import normalize_MPU9250_data, split_df, string2json
9+
from ChairAnalyzer import ChairAnalyser
10+
11+
plt.interactive(True)
12+
pd.options.display.max_columns = 15
13+
pic_prefix = 'pic/'
14+
data_path = 'Anonimised Data/Data'
15+
processed_data_path = 'data/players_data_processed'
16+
17+
data_dict = joblib.load('data/data_dict')
18+
19+
def check_player_is_killed(parameters_dict):
20+
# First condition: event is that somebody dies
21+
# Second condition: dying player is skoltech experimental rat
22+
return ('userid' in parameters_dict) and (parameters_dict['userid'].find('koltech') != -1)
23+
24+
25+
gamedata_dict = {}
26+
27+
for player_id, player_data_dict in data_dict.items():
28+
if 'gamelog' not in player_data_dict:
29+
continue
30+
31+
mask_player_events = player_data_dict['gamelog']['parameters'].apply(lambda x: x.find('koltech')) != -1
32+
df_gamelog = player_data_dict['gamelog'].loc[mask_player_events, :]
33+
df_gamelog['parameters'] = df_gamelog['parameters'].apply(string2json)
34+
df_gamelog['health'] = df_gamelog['parameters'].apply(lambda x: int(x['health']) if 'health' in x else None)
35+
mask_somebody_is_killed = df_gamelog['health'] == 0
36+
37+
mask_player_is_killed = mask_somebody_is_killed & df_gamelog.loc[:, 'parameters'].apply(check_player_is_killed)
38+
mask_player_kills = mask_somebody_is_killed & ~mask_player_is_killed
39+
40+
times_is_killed = df_gamelog.loc[mask_player_is_killed, 'time'].values
41+
times_kills = df_gamelog.loc[mask_player_kills, 'time'].values
42+
43+
player_gamedata_dict = {
44+
'times_is_killed': times_is_killed,
45+
'times_kills': times_kills,
46+
}
47+
48+
gamedata_dict[player_id] = player_gamedata_dict
49+
50+
51+
# gamedata_dict['9'].keys()
52+
# gamedata_dict['9']['times_is_killed']
53+
# gamedata_dict['9']['times_kills']
54+
55+
56+
joblib.dump(gamedata_dict, 'data/gamedata_dict')
57+
58+
59+
60+
61+
62+
data_dict['2']['gamelog']['parameters']
63+
64+
65+
66+
67+
68+
69+
# with open(gamelog_path, 'rb') as f:
70+
# gamelog = f.readlines()
71+
#
72+
# # gamelog = [string.decode() for string in gamelog]
73+
# gamelog_lenght_initial = len(gamelog)
74+
# gamelog = [string for string in gamelog if string.find(b'koltech') != -1]
75+
# gamelog_lenght_filtered = len(gamelog)
76+
# print(f'gamelog_lenght_initial = {gamelog_lenght_initial}, gamelog_lenght_filtered = {gamelog_lenght_filtered}')
77+
#
78+
# # with open('tmp/gamelog.csv', 'wb') as f:
79+
# with open(processed_data_path + '/gamelog.csv', 'wb') as f:
80+
# # for line in gamelog:
81+
# # f.write(line)
82+
# f.writelines(gamelog)
83+
#
84+
# df_gamelog = pd.read_csv('tmp/gamelog.csv', header=None)
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
95+
96+
97+
98+
99+
100+
101+
times = pd.to_datetime(df_gamelog.loc[mask_killed, 'time'])
102+
np.diff(times.values) / 10 ** 9
103+
104+
105+
(times.iloc[1:] - times.iloc[:-1].values).iloc[10]
106+
107+
108+
109+
(df_gamelog['health_is_0']).sum()
110+
111+
112+
plt.plot(df_gamelog['health_is_0'])
113+
114+
115+
# TODO: check player behaviour right after death
116+
117+
118+
df_gamelog['event'].value_counts()
119+
120+
121+
mask_fire = df_gamelog['event'] == 'weapon_fire'
122+
123+
df_gamelog.loc[mask_fire, 'parameters']
124+
fire_times = pd.to_datetime(df_gamelog.loc[mask_fire, 'time'])
125+
(fire_times.values[1:] - fire_times.values[:-1]).min()
126+
127+
df_fire = pd.DataFrame(list(df_gamelog.loc[mask_fire, 'parameters'].values))
128+
129+
df_fire.info()
130+
131+
132+
133+

GeneralDataProcessing.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
import os
5+
import joblib
6+
from utils import normalize_MPU9250_data
7+
8+
plt.interactive(True)
9+
pd.options.display.max_columns = 15
10+
pic_prefix = 'pic/'
11+
# data_path = 'data/CSV'
12+
data_path = 'Anonimised Data/Data'
13+
14+
player_folders = os.listdir(data_path)
15+
player_folders = [f"{data_path}/{folder}" for folder in player_folders if not folder.startswith('.')]
16+
17+
data_dict = {}
18+
19+
data_sources_list = ['schairlog', 'gamelog'] # List sources for analysis here
20+
21+
# chair_data_columns = ['time', 'acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'mag_x', 'mag_y', 'mag_z']
22+
23+
for player_folder in player_folders:
24+
player_data_dict = {}
25+
player_id = player_folder.split('/')[-1]
26+
27+
player_files = os.listdir(player_folder)
28+
player_files = [file for file in player_files if not file.startswith('.')]
29+
player_data_sources = [file.split('_')[0] for file in player_files] # There are might be repetitions
30+
# print(player_data_sources)
31+
32+
for file, data_source in zip(player_files, player_data_sources):
33+
if data_source not in data_sources_list:
34+
continue
35+
36+
try:
37+
df = pd.read_csv(player_folder + '/' + file)
38+
39+
if data_source in player_data_dict: # If already in dict it's appended
40+
new_df = pd.concat([player_data_dict[data_source], df], axis=0).reset_index(drop=True)
41+
player_data_dict[data_source] = new_df
42+
else:
43+
player_data_dict[data_source] = df
44+
except:
45+
pass
46+
47+
# Sorting by time and fixing naming
48+
for data_source in player_data_dict.keys():
49+
if data_source == 'gamelog':
50+
player_data_dict[data_source].rename(columns={'Unnamed: 0': 'time'}, inplace=True)
51+
52+
player_data_dict[data_source].sort_values(by='time', inplace=True)
53+
player_data_dict[data_source].reset_index(drop=True, inplace=True)
54+
55+
data_dict[player_id] = player_data_dict
56+
57+
joblib.dump(data_dict, 'data/data_dict')
58+
59+

0 commit comments

Comments
 (0)