jac2130
diff --git a/‎corenlp.py‎
Lines changed: 41 additions & 7 deletions b/‎corenlp.py‎
Lines changed: 41 additions & 7 deletions
@@ -55,11 +55,17 @@ def parse_parser_results(text):
  av = re.split("=| ", s) 
  # make [ignore,ignore,a,b,c,d] into [[a,b],[c,d]]
  # and save as attr-value dict, convert numbers into ints
- tmp['words'].append((av[1], dict(zip(*[av[2:][x::2] for x in (0, 1)]))))
+ #tmp['words'].append((av[1], dict(zip(*[av[2:][x::2] for x in (0, 1)]))))
  # tried to convert digits to ints instead of strings, but
  # it seems the results of this can't be serialized into JSON?
- # av = zip(*[av[2:][x::2] for x in (0, 1)]) 
- # tmp['words'][av[1]] = dict(map(lambda x: (x[0], x[1].isdigit() and int(x[1]) or x[1]), av))
+ word = av[1]
+ attributes = {}
+ for a,v in zip(*[av[2:][x::2] for x in (0, 1)]):
+ if v.isdigit():
+ attributes[a] = int(v)
+ else:
+ attributes[a] = v
+ tmp['words'].append((word, attributes))
  state = 3
  elif state == 3:
  # skip over parse tree
@@ -72,12 +78,22 @@ def parse_parser_results(text):
  if not line.startswith(" ") and line.endswith(")"):
  split_entry = re.split("\(|, ", line[:-1]) 
  if len(split_entry) == 3:
- rel, left, right = map(lambda x: remove_id(x), split_entry)
+ rel, left, right = map(lambda x: x, split_entry)
  tmp['tuples'].append(tuple([rel,left,right]))
  elif "Coreference links" in line:
  state = 5
  elif state == 5:
- # coreference links. Not yet implemented
+ crexp = re.compile('\s(\d*)\s(\d*)\s\-\>\s(\d*)\s(\d*), that is')
+ matches = crexp.findall(line)
+ for src_i, src_pos, sink_i, sink_pos in matches:
+ print "COREF MATCH", src_i, sink_i
+ src = tmp['words'][int(src_pos)-1][0]
+ sink = tmp['words'][int(sink_pos)-1][0]
+ if tmp.has_key('coref'):
+ tmp['coref'].append((src, sink))
+ else:
+ tmp['coref'] = [(src, sink)]
+ 
  print "CR", line
  if len(tmp.keys()) != 0:
  results.append(tmp)
@@ -191,8 +207,9 @@ def _parse(self, text, verbose=True):
  def _debug_parse(self, text, verbose=True):
  print "DEBUG PARSE -- "
  rf = open("test.out", 'r')
- results = rf.readlines()
+ incoming = ''.join(rf.readlines())
  rf.close()
+ results = parse_parser_results(incoming)
  return results
 
  def parse(self, text, verbose=True):
@@ -220,7 +237,12 @@ def parse_imperative(self, text, verbose=True):
  used_pronoun = None
  pronouns = ["you","he", "she","i"]
  for p in pronouns:
+ if text.startswith(p+" "):
+ # it's already an imperative!
+ used_pronoun = None
+ break
  if p not in text:
+ # found one not in there already
  used_pronoun = p
  break
  # if you can't find one, regress to original parse
@@ -229,19 +251,31 @@ def parse_imperative(self, text, verbose=True):
 
  # create text with pronoun and parse it
  new_text = used_pronoun+" "+text.lstrip()
- result = self._parse(new_text, verbose)
+ result = self._debug_parse(new_text, verbose)
+ 
+ if len(result) != 1:
+ print "Non-imperative sentence? Multiple sentences found."
 
  # remove the dummy pronoun
+ used_pronoun_offset = len(used_pronoun)+1
  if result[0].has_key('text'):
  result[0]['text'] = text
  result[0]['tuples'] = filter(lambda x: not (x[1] == used_pronoun or x[2]
  == used_pronoun), result[0]['tuples'])
  result[0]['words'] = result[0]['words'][1:]
+ # account for offset
+ ct = 0
+ for word, av in result[0]['words']:
+ for a,v in av.items():
+ if a.startswith("CharacterOffset"):
+ result[0]['words'][ct][1][a] = v-used_pronoun_offset
+ ct += 1
  return dumps(result)
  else:
  # if there's a timeout error, just return it.
  return dumps(result)
 
+
 if __name__ == '__main__':
  parser = optparse.OptionParser(usage="%prog [OPTIONS]")
  parser.add_option(