@@ -55,11 +55,17 @@ def parse_parser_results(text):
5555 av = re .split ("=| " , s )
5656 # make [ignore,ignore,a,b,c,d] into [[a,b],[c,d]]
5757 # and save as attr-value dict, convert numbers into ints
58- tmp ['words' ].append ((av [1 ], dict (zip (* [av [2 :][x ::2 ] for x in (0 , 1 )]))))
58+ # tmp['words'].append((av[1], dict(zip(*[av[2:][x::2] for x in (0, 1)]))))
5959 # tried to convert digits to ints instead of strings, but
6060 # it seems the results of this can't be serialized into JSON?
61- # av = zip(*[av[2:][x::2] for x in (0, 1)])
62- # tmp['words'][av[1]] = dict(map(lambda x: (x[0], x[1].isdigit() and int(x[1]) or x[1]), av))
61+ word = av [1 ]
62+ attributes = {}
63+ for a ,v in zip (* [av [2 :][x ::2 ] for x in (0 , 1 )]):
64+ if v .isdigit ():
65+ attributes [a ] = int (v )
66+ else :
67+ attributes [a ] = v
68+ tmp ['words' ].append ((word , attributes ))
6369 state = 3
6470 elif state == 3 :
6571 # skip over parse tree
@@ -72,12 +78,22 @@ def parse_parser_results(text):
7278 if not line .startswith (" " ) and line .endswith (")" ):
7379 split_entry = re .split ("\(|, " , line [:- 1 ])
7480 if len (split_entry ) == 3 :
75- rel , left , right = map (lambda x : remove_id ( x ) , split_entry )
81+ rel , left , right = map (lambda x : x , split_entry )
7682 tmp ['tuples' ].append (tuple ([rel ,left ,right ]))
7783 elif "Coreference links" in line :
7884 state = 5
7985 elif state == 5 :
80- # coreference links. Not yet implemented
86+ crexp = re .compile ('\s(\d*)\s(\d*)\s\-\>\s(\d*)\s(\d*), that is' )
87+ matches = crexp .findall (line )
88+ for src_i , src_pos , sink_i , sink_pos in matches :
89+ print "COREF MATCH" , src_i , sink_i
90+ src = tmp ['words' ][int (src_pos )- 1 ][0 ]
91+ sink = tmp ['words' ][int (sink_pos )- 1 ][0 ]
92+ if tmp .has_key ('coref' ):
93+ tmp ['coref' ].append ((src , sink ))
94+ else :
95+ tmp ['coref' ] = [(src , sink )]
96+
8197 print "CR" , line
8298 if len (tmp .keys ()) != 0 :
8399 results .append (tmp )
@@ -191,8 +207,9 @@ def _parse(self, text, verbose=True):
191207 def _debug_parse (self , text , verbose = True ):
192208 print "DEBUG PARSE -- "
193209 rf = open ("test.out" , 'r' )
194- results = rf .readlines ()
210+ incoming = '' . join ( rf .readlines () )
195211 rf .close ()
212+ results = parse_parser_results (incoming )
196213 return results
197214
198215 def parse (self , text , verbose = True ):
@@ -220,7 +237,12 @@ def parse_imperative(self, text, verbose=True):
220237 used_pronoun = None
221238 pronouns = ["you" ,"he" , "she" ,"i" ]
222239 for p in pronouns :
240+ if text .startswith (p + " " ):
241+ # it's already an imperative!
242+ used_pronoun = None
243+ break
223244 if p not in text :
245+ # found one not in there already
224246 used_pronoun = p
225247 break
226248 # if you can't find one, regress to original parse
@@ -229,19 +251,31 @@ def parse_imperative(self, text, verbose=True):
229251
230252 # create text with pronoun and parse it
231253 new_text = used_pronoun + " " + text .lstrip ()
232- result = self ._parse (new_text , verbose )
254+ result = self ._debug_parse (new_text , verbose )
255+
256+ if len (result ) != 1 :
257+ print "Non-imperative sentence? Multiple sentences found."
233258
234259 # remove the dummy pronoun
260+ used_pronoun_offset = len (used_pronoun )+ 1
235261 if result [0 ].has_key ('text' ):
236262 result [0 ]['text' ] = text
237263 result [0 ]['tuples' ] = filter (lambda x : not (x [1 ] == used_pronoun or x [2 ]
238264 == used_pronoun ), result [0 ]['tuples' ])
239265 result [0 ]['words' ] = result [0 ]['words' ][1 :]
266+ # account for offset
267+ ct = 0
268+ for word , av in result [0 ]['words' ]:
269+ for a ,v in av .items ():
270+ if a .startswith ("CharacterOffset" ):
271+ result [0 ]['words' ][ct ][1 ][a ] = v - used_pronoun_offset
272+ ct += 1
240273 return dumps (result )
241274 else :
242275 # if there's a timeout error, just return it.
243276 return dumps (result )
244277
278+
245279if __name__ == '__main__' :
246280 parser = optparse .OptionParser (usage = "%prog [OPTIONS]" )
247281 parser .add_option (
0 commit comments