2323from syntaxnet import sentence_pb2 , structured_graph_builder
2424from syntaxnet .ops import gen_parser_ops
2525import syntaxnet .load_parser_ops
26- from syntaxnet .conll2tree import to_dict
26+ # from syntaxnet.conll2tree import to_dict
2727
2828input_file_path = os .path .join (PROJECT_ROOT , "data" , "input-file.txt" )
2929output_file_path = os .path .join (PROJECT_ROOT , "data" , "output-file.txt" )
@@ -49,6 +49,92 @@ def _write_input(sentence):
4949 input_file .flush ()
5050input_file .close ()
5151
52+ def as_asciitree (sentence ):
53+ import asciitree
54+ from collections import defaultdict
55+ children = defaultdict (list )
56+ # since erased nodes may be missing, multiple tokens may have same
57+ # index (CCprocessed), etc.
58+ token_to_index = {}
59+ roots = []
60+ # for token in self:
61+ for i in range (0 , len (sentence .token )):
62+ token = sentence .token [i ]
63+ print token .word , token .tag , token .label , token .head
64+ children [token .head ].append (token )
65+ token_to_index [token .word + ":" + token .label + ":" + token .head ] = i + 1
66+ if token .head == - 1 :
67+ roots .append (token )
68+
69+ assert roots , "Couldn't find root Token(s)"
70+
71+ if len (roots ) > 1 :
72+ # multiple roots so we make a fake one to be their parent
73+ root = Token (0 , 'ROOT' , 'ROOT-LEMMA' , 'ROOT-CPOS' , 'ROOT-POS' ,
74+ None , None , 'ROOT-DEPREL' , None , None , None )
75+ token_to_index [root ] = 0
76+ children [0 ] = roots
77+ else :
78+ root = roots [0 ]
79+
80+ def child_func (token ):
81+ index = token_to_index [token .word + ":" + token .label + ":" + token .head ]
82+ return children [index ]
83+ if not str_func :
84+ def str_func (token ):
85+ return ' %s [%s]' % (token .word , token .label )
86+
87+ return asciitree .draw_tree (root , child_func , str_func )
88+
89+ def to_dict (sentence ):
90+ token_str = list ()
91+ children = [[] for token in sentence .token ]
92+ roots = []
93+ root = - 1
94+ for i in range (0 , len (sentence .token )):
95+ token = sentence .token [i ]
96+ print "current token:" , token
97+ token_str .append ('%s %s %s @%d' %
98+ (token .word , token .tag , token .label , (i + 1 )))
99+ if token .head == - 1 :
100+ roots .append (i )
101+ root = i
102+ else :
103+ print "appending child:" , i , token .word , " - to parent - " , token .head
104+ children [token .head ].append (i )
105+
106+ assert roots , "Couldnt find roots!!"
107+
108+ if len (roots ) > 1 :
109+ # multiple roots so we make a fake one to be their parent
110+ # root = Token(0, 'ROOT', 'ROOT-LEMMA', 'ROOT-CPOS', 'ROOT-POS',
111+ # None, None, 'ROOT-DEPREL', None, None, None)
112+ print ("========== FOUND > 1 ROOT ==========" , roots )
113+ s = '%s %s %s @%d' % ("" ,"" ,"" ,1 )
114+ token_str .insert (0 , s )
115+ children [0 ] = roots
116+ root = 0
117+
118+ visited = []
119+ for i in range (len (children )):
120+ visited .append (0 )
121+
122+ def _get_dict (i ):
123+ d = collections .OrderedDict ()
124+ for c in children [i ]:
125+ print "CHILDREN:" , c , token_str [c ]
126+ if (visited [c ] == 0 ):
127+ visited [c ] = 1
128+ d [token_str [c ]] = _get_dict (c )
129+
130+ # d[token_str[c]] = _get_dict(c)
131+ return d
132+
133+ tree = collections .OrderedDict ()
134+ print "ROOT:" , root
135+ tree [token_str [root ]] = _get_dict (root )
136+ return tree
137+
52138def pretty_print ():
53139_write_input (_read_output ().strip ())
54140logging .set_verbosity (logging .INFO )
@@ -60,6 +146,9 @@ def pretty_print():
60146while True :
61147documents , finished = sess .run (src )
62148logging .info ('Read %d documents' , len (documents ))
149+ # for d in documents:
150+ # sentence.ParseFromString(d)
151+ # as_asciitree(sentence)
63152for d in documents :
64153sentence .ParseFromString (d )
65154tr = asciitree .LeftAligned ()
0 commit comments