Skip to content
This repository was archived by the owner on Dec 28, 2018. It is now read-only.

Commit bf6938f

Browse files
authored
Merge pull request #11 from akankshadara/master
modified to add remove_stopwords()
2 parents c23a1a6 + 760049e commit bf6938f

File tree

1 file changed

+53
-32
lines changed

1 file changed

+53
-32
lines changed

feature1.py

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
import sys
22
from string import ascii_lowercase
33
import nltk
4-
from nltk import word_tokenize
54
from nltk import bigrams
65
import string
76
import re
7+
import os
88
import csv
99
from nltk.stem.snowball import SnowballStemmer
10+
from ExtraPreProc import remove_stop_words
1011
# reload(sys)
1112
# sys.setdefaultencoding('utf8')
1213

14+
bidict={}
15+
tridict={}
1316

1417
def getSentiStrength(w):
1518
stemmer = SnowballStemmer("english",ignore_stopwords=True)
@@ -51,8 +54,7 @@ def contrastingFeatures(words):
5154
sentiscores=[]
5255
bigrams = []
5356
trigrams = []
54-
bidict={}
55-
tridict={}
57+
5658
poscount=0
5759
possum=0
5860
negcount=0
@@ -77,61 +79,80 @@ def contrastingFeatures(words):
7779
trigrams.append(new_words[c]+new_words[c+1]+new_words[c+2])
7880
c=c+1
7981

80-
with open('bigramscores.csv','r') as file2:
81-
for line in file2:
82-
temp = line.split()
83-
bidict[temp[0]]=float(temp[1])
84-
file2.close()
85-
86-
with open('trigramscores.csv','r') as file2:
87-
for line in file2:
88-
temp = line.split()
89-
tridict[temp[0]]=float(temp[1])
90-
file2.close()
91-
9282
for bi in bigrams:
9383
if bi in bidict:
9484
if bidict[bi]>0:
95-
possum+= bidict[bi]
85+
possum+= float(bidict[bi])
9686
poscount=poscount + 1
97-
print "here1"
87+
# print "here1"
9888
else:
99-
negsum+=bidict[bi]
89+
negsum+= float(bidict[bi])
10090
negcount=negcount+1
10191

10292
for tri in trigrams:
10393
if tri in tridict:
10494
if tridict[tri]>0:
105-
possum+= tridict[tri]
95+
possum+= float(tridict[tri])
10696
poscount=poscount+1
107-
# print(poscount)
108-
# print(possum)
97+
# print "here2"
98+
10999
else:
110-
negsum+=tridict[tri]
100+
negsum+=float(tridict[tri])
111101
negcount=negcount+1
112102

113103
delta_affect = (max(affectscores) - min(affectscores))
114104
delta_sentiment= (max(sentiscores)-min(sentiscores))
115-
print(poscount)
116-
print(possum)
117-
print(negcount)
118-
print(negsum)
119-
120-
print((trigrams))
121-
# print(len(bidict))
122105

123106
output = [delta_affect, delta_sentiment, poscount, possum, negcount, negsum]
124107

125108
output_file = open("feature1.csv", "w");
126109
writer = csv.writer(output_file)
127110
writer.writerow(output)
111+
print output
128112

129113
return output
130114

131115

116+
def writeFile(folder):
117+
# checking presence of repeated characters
118+
119+
for f in sorted(os.listdir(folder)):
120+
inputFile = open(os.path.join(folder,f),"r")
121+
reader = list(csv.reader(inputFile))
122+
tweet = reader[1][2]
123+
tweet = " ".join(remove_stop_words(tweet))
124+
print tweet
125+
contrastingFeatures(tweet)
126+
127+
def main():
128+
with open('bigramscores.csv','r') as file2:
129+
for line in file2:
130+
key = line.split(",")[0]
131+
val = line.split(",")[1]
132+
bidict[key]=float(val)
133+
file2.close()
134+
135+
with open('trigramscores.csv','r') as file2:
136+
for line in file2:
137+
key = line.split(",")[0]
138+
val = line.split(",")[1]
139+
bidict[key]=float(val)
140+
file2.close()
141+
142+
# tweet = "i love getting spam mails"
143+
# tweet = " ".join(remove_stop_words(tweet))
144+
# print tweet
145+
# contrastingFeatures(tweet)
146+
pwd = os.getcwd()
147+
normal = pwd + "/normal_with_past_PP"
148+
sarcastic = pwd + "/sarcastic_with_past"
149+
writeFile(normal)
150+
writeFile(sarcastic)
151+
152+
153+
154+
if __name__ == "__main__":
155+
main()
132156

133-
# tweet = "i love getting spam mails"
134157

135-
# contrastingFeatures(tweet)
136-
# bigram_score(tweet)
137158
# function to read tweets from preprocessed data and pass to contrastingFeatures()

0 commit comments

Comments
 (0)