11import sys
22from string import ascii_lowercase
33import nltk
4- from nltk import word_tokenize
54from nltk import bigrams
65import string
76import re
7+ import os
88import csv
99from nltk .stem .snowball import SnowballStemmer
10+ from ExtraPreProc import remove_stop_words
1011# reload(sys)
1112# sys.setdefaultencoding('utf8')
1213
14+ bidict = {}
15+ tridict = {}
1316
1417def getSentiStrength (w ):
1518stemmer = SnowballStemmer ("english" ,ignore_stopwords = True )
@@ -51,8 +54,7 @@ def contrastingFeatures(words):
5154sentiscores = []
5255bigrams = []
5356trigrams = []
54- bidict = {}
55- tridict = {}
57+
5658poscount = 0
5759possum = 0
5860negcount = 0
@@ -77,61 +79,80 @@ def contrastingFeatures(words):
7779trigrams .append (new_words [c ]+ new_words [c + 1 ]+ new_words [c + 2 ])
7880c = c + 1
7981
80- with open ('bigramscores.csv' ,'r' ) as file2 :
81- for line in file2 :
82- temp = line .split ()
83- bidict [temp [0 ]]= float (temp [1 ])
84- file2 .close ()
85-
86- with open ('trigramscores.csv' ,'r' ) as file2 :
87- for line in file2 :
88- temp = line .split ()
89- tridict [temp [0 ]]= float (temp [1 ])
90- file2 .close ()
91-
9282for bi in bigrams :
9383if bi in bidict :
9484if bidict [bi ]> 0 :
95- possum += bidict [bi ]
85+ possum += float ( bidict [bi ])
9686poscount = poscount + 1
97- print "here1"
87+ # print "here1"
9888else :
99- negsum += bidict [bi ]
89+ negsum += float ( bidict [bi ])
10090negcount = negcount + 1
10191
10292for tri in trigrams :
10393if tri in tridict :
10494if tridict [tri ]> 0 :
105- possum += tridict [tri ]
95+ possum += float ( tridict [tri ])
10696poscount = poscount + 1
107- # print(poscount)
108- # print(possum)
97+ # print "here2"
98+
10999else :
110- negsum += tridict [tri ]
100+ negsum += float ( tridict [tri ])
111101negcount = negcount + 1
112102
113103delta_affect = (max (affectscores ) - min (affectscores ))
114104delta_sentiment = (max (sentiscores )- min (sentiscores ))
115- print (poscount )
116- print (possum )
117- print (negcount )
118- print (negsum )
119-
120- print ((trigrams ))
121- # print(len(bidict))
122105
123106output = [delta_affect , delta_sentiment , poscount , possum , negcount , negsum ]
124107
125108output_file = open ("feature1.csv" , "w" );
126109writer = csv .writer (output_file )
127110writer .writerow (output )
111+ print output
128112
129113return output
130114
131115
116+ def writeFile (folder ):
117+ # checking presence of repeated characters
118+
119+ for f in sorted (os .listdir (folder )):
120+ inputFile = open (os .path .join (folder ,f ),"r" )
121+ reader = list (csv .reader (inputFile ))
122+ tweet = reader [1 ][2 ]
123+ tweet = " " .join (remove_stop_words (tweet ))
124+ print tweet
125+ contrastingFeatures (tweet )
126+
127+ def main ():
128+ with open ('bigramscores.csv' ,'r' ) as file2 :
129+ for line in file2 :
130+ key = line .split ("," )[0 ]
131+ val = line .split ("," )[1 ]
132+ bidict [key ]= float (val )
133+ file2 .close ()
134+
135+ with open ('trigramscores.csv' ,'r' ) as file2 :
136+ for line in file2 :
137+ key = line .split ("," )[0 ]
138+ val = line .split ("," )[1 ]
139+ bidict [key ]= float (val )
140+ file2 .close ()
141+
142+ # tweet = "i love getting spam mails"
143+ # tweet = " ".join(remove_stop_words(tweet))
144+ # print tweet
145+ # contrastingFeatures(tweet)
146+ pwd = os .getcwd ()
147+ normal = pwd + "/normal_with_past_PP"
148+ sarcastic = pwd + "/sarcastic_with_past"
149+ writeFile (normal )
150+ writeFile (sarcastic )
151+
152+
153+
154+ if __name__ == "__main__" :
155+ main ()
132156
133- # tweet = "i love getting spam mails"
134157
135- # contrastingFeatures(tweet)
136- # bigram_score(tweet)
137158# function to read tweets from preprocessed data and pass to contrastingFeatures()
0 commit comments