Skip to content

Commit e769a74

Browse files
committed
Term Project codes added
1 parent 1e85594 commit e769a74

File tree

2 files changed

+480
-0
lines changed

2 files changed

+480
-0
lines changed
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import pandas as pd
2+
from datetime import datetime
3+
import os
4+
import numpy as np
5+
6+
7+
#The path where the data is stored
8+
datapath_begin='New Data Collection/'
9+
datapath_end='/Keyboard Database/sentence/'
10+
11+
12+
#The details of the user data we are considering
13+
user_details = {0:['Lalit','14EC10025'],2:['Arnab','14EC35031'],3:['Sandeep','14EC35033'],4:['Charu','14EC35003'],1:['Koruprolu Asish','14EC10024'] }
14+
15+
16+
'''To read files and extract data from it '''
17+
##############################################
18+
19+
def extract_data(username,roll_number,mood,cont=False):
20+
21+
data2 = pd.DataFrame({'key' : [],
22+
'press_time' : [],
23+
'latency' : []})
24+
path = datapath_begin + username + '_' + roll_number + datapath_end + mood +'/'
25+
directory = os.path.join(path)
26+
print 'Collecting data of '+username+'...'
27+
for root,dirs,files in os.walk(directory):
28+
for file in files:
29+
if file.endswith(".txt"):
30+
with open(directory+file,"r") as file1:
31+
if cont:
32+
FMT = '%d:%H:%M:%S.%f'
33+
else:
34+
FMT = '%d:%m:%Y:%H:%M:%S.%f'
35+
loop = 0
36+
queue = [[],[]]
37+
data1 = pd.DataFrame({'key' : [],
38+
'time' : [],
39+
'key_press_time' : []})
40+
for line in file1:
41+
loop += 1
42+
#refine data
43+
if cont:
44+
a = line.split()[0]
45+
b = line.split()[1]
46+
c = line.split()[7]
47+
48+
else:
49+
if len(line.split()) == 3:
50+
[a,b,c] = line.split()
51+
if b=='\b':
52+
b="backspace"
53+
c = c[:-4] + '.' + c[-3:]
54+
c = c[:6] + '20' + c[6:]
55+
56+
elif len(line.split()) == 2:
57+
[a,c] = line.split()
58+
b = "space"
59+
c = c[:-4] + '.' + c[-3:]
60+
c = c[:6] + '20' + c[6:]
61+
62+
else:
63+
print "Error in Data"+str(file)
64+
65+
#record keypress
66+
if a == 'KeyDown':
67+
try:
68+
index_of_letter = queue[0].index(b)
69+
except:
70+
queue[0].append(b)
71+
queue[1].append(c)
72+
else:
73+
continue
74+
75+
#compute data for key release
76+
if loop > 1 and a == 'KeyUp':
77+
#locate index
78+
79+
try:
80+
index_of_letter = queue[0].index(b)
81+
82+
except ValueError:
83+
index_of_letter = None
84+
else:
85+
c1 = queue[1][index_of_letter]
86+
87+
#calculate time diff
88+
tdelta = datetime.strptime(c, FMT) - datetime.strptime(c1, FMT)
89+
90+
df1 = pd.DataFrame({'key' :[b],
91+
'time' :[c1],
92+
'key_press_time' :[tdelta.microseconds]})
93+
data1 = data1.append(df1,ignore_index=True)
94+
95+
#remove key from queue
96+
del queue[0][index_of_letter]
97+
del queue[1][index_of_letter]
98+
if loop>5000:
99+
break
100+
101+
for i in range(len(data1.index)):
102+
if i == 0:
103+
[a1,b1,c1] = data1.iloc[i]
104+
df1 = pd.DataFrame({'key' :[a1],
105+
'press_time' :[0],
106+
'latency' :[b1]})
107+
data2 = data2.append(df1,ignore_index=True)
108+
else:
109+
[a1,b1,c1] = data1.iloc[i]
110+
[a2,b2,c2] = data1.iloc[i-1]
111+
tdelta = datetime.strptime(c1, FMT) - datetime.strptime(c2, FMT)
112+
df1 = pd.DataFrame({'key' :[a1],
113+
'press_time' :[tdelta.microseconds],
114+
'latency' :[b1]})
115+
data2 = data2.append(df1,ignore_index=True)
116+
return data2
117+
118+
#####################################################
119+
120+
121+
122+
123+
'''To remove outliers from the dataset '''
124+
##############################################
125+
126+
def remove_outlier(df_in, col_name):
127+
q1 = df_in[col_name].quantile(0.25)
128+
q3 = df_in[col_name].quantile(0.75)
129+
iqr = q3-q1 #Interquartile range
130+
fence_low = q1-1.5*iqr
131+
fence_high = q3+1.5*iqr
132+
df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
133+
return df_out
134+
135+
###############################################
136+
137+
138+
139+
140+
141+
142+
'''To remove outliers from the dataset '''
143+
##############################################
144+
145+
def get_data(n_classes,emotion='Happy',neutral=False,cont=False):
146+
data_list1=[]
147+
data_list2=[]
148+
label_list=[]
149+
data = pd.DataFrame({'key' : [],
150+
'press_time' : [],
151+
'latency' : []})
152+
data1 = pd.DataFrame({'key' : [],
153+
'press_time' : [],
154+
'latency' : []})
155+
for i in range(n_classes):
156+
#Selecting the type of data
157+
if neutral:
158+
fldr_name='Neutral'
159+
elif cont:
160+
fldr_name='Continuous'
161+
else:
162+
fldr_name='Emotional/'+emotion
163+
#Extracting raw data from the files
164+
data1=extract_data(user_details[i][0],user_details[i][1],fldr_name,cont)
165+
data2 = data1.loc[(data1['press_time'] != 0)]
166+
#Removes the outliers from the data with respect to both the attributes
167+
data=remove_outlier(remove_outlier(data2,'press_time'),'latency')
168+
for j in range(data.shape[0]):
169+
#Converted the data to ms
170+
data_list1.append(data.iloc[j,1]/1000)
171+
data_list2.append(data.iloc[j,2]/1000)
172+
label_list.append(i)
173+
#To store the data in a numpy array in a proper way
174+
X_data=np.zeros((len(data_list1)-1,2))
175+
Y_data=np.zeros((len(data_list1)-1))
176+
X_data[:,0]=np.array(data_list1[0:-1])
177+
X_data[:,1]=np.array(data_list2[0:-1])
178+
Y_data[:]=np.array(label_list[0:-1])
179+
Y_data=np.reshape(Y_data,(len(data_list1)-1,1))
180+
return X_data,Y_data
181+
182+
###############################################
183+
184+
185+
186+
187+
188+
189+
'''Main function to verify the Feature extraction '''
190+
#####################################################
191+
192+
193+
if __name__ == '__main__':
194+
n_classes=5
195+
X_cont,Y_cont = get_data(n_classes,cont=True)
196+
print X_cont.shape
197+
print Y_cont.shape

0 commit comments

Comments
 (0)