@@ -93,7 +93,69 @@ def parse_node_line(self, line, root, nodes, parents, mwts):
9393 setattr (node , 'ord' , int (fields [n_attribute ]))
9494 elif attribute_name == 'deps' :
9595 setattr (node , 'raw_deps' , fields [n_attribute ])
96- elif attribute_name != '_' :
96+ elif attribute_name != '_' and fields [ n_attribute ] != '_' :
9797 setattr (node , attribute_name , fields [n_attribute ])
9898
9999 nodes .append (node )
100+
101+ # Acknowledged code duplication with read.Conllu
102+ def read_tree_from_lines (self , lines ):
103+ root = Root ()
104+ nodes = [root ]
105+ parents = [0 ]
106+ mwts = []
107+ for line in lines :
108+ if line [0 ] == '#' :
109+ self .parse_comment_line (line , root )
110+ else :
111+ self .parse_node_line (line , root , nodes , parents , mwts )
112+
113+ # If no nodes were read from the filehandle (so only root remained in nodes),
114+ # we return None as a sign of failure (end of file or more than one empty line).
115+ if len (nodes ) == 1 :
116+ return None
117+
118+ # Empty sentences are not allowed in CoNLL-U,
119+ # but if the users want to save just the sentence string and/or sent_id
120+ # they need to create one artificial node and mark it with Empty=Yes.
121+ # In that case, we will delete this node, so the tree will have just the (technical) root.
122+ # See also udapi.block.write.Conllu, which is compatible with this trick.
123+ if len (nodes ) == 2 and str (nodes [1 ].misc ) == 'Empty=Yes' :
124+ nodes .pop ()
125+ root ._children = []
126+ root ._descendants = []
127+
128+ # Set dependency parents (now, all nodes of the tree are created).
129+ for node_ord , node in enumerate (nodes [1 :], 1 ):
130+ try :
131+ parent = nodes [parents [node_ord ]]
132+ except IndexError :
133+ raise ValueError ("Node %s HEAD is out of range (%d)" % (node , parents [node_ord ]))
134+ if node is parent :
135+ if self .fix_cycles :
136+ logging .warning ("Ignoring a cycle (attaching to the root instead):\n %s" , node )
137+ node ._parent = root
138+ root ._children .append (node )
139+ else :
140+ raise ValueError (f"Detected a cycle: { node } attached to itself" )
141+ elif node .children :
142+ climbing = parent ._parent
143+ while climbing :
144+ if climbing is node :
145+ if self .fix_cycles :
146+ logging .warning ("Ignoring a cycle (attaching to the root instead):\n %s" , parent )
147+ parent = root
148+ break
149+ else :
150+ raise ValueError (f"Detected a cycle: { node } " )
151+ climbing = climbing ._parent
152+ node ._parent = parent
153+ parent ._children .append (node )
154+
155+ # Create multi-word tokens.
156+ for fields in mwts :
157+ range_start , range_end = fields [0 ].split ('-' )
158+ words = nodes [int (range_start ):int (range_end ) + 1 ]
159+ root .create_multiword_token (words , form = fields [1 ], misc = fields [- 1 ])
160+
161+ return root
0 commit comments