Skip to content

Commit 8b1f421

Browse files
committed
Block to fix UD validation of CorefUD 0.2
- this block must be run to fix the trees in CorefUD so they pass the current UD validator - so far it fixes the following issues: - the node with 0 parent must have DEPREL=root - there must be a space before newdoc or newpar
1 parent d7da778 commit 8b1f421

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from udapi.core.block import Block
2+
3+
class FixToValidate(Block):
4+
"""This block fixes the CorefUD data so that the final documents are valid conllu files."""
5+
6+
def _set_root_deprel(self, node):
7+
if node.parent == node.root and node.deprel != "root":
8+
node.deprel = "root"
9+
print(node)
10+
11+
def _space_before_pardoc(self, doc):
12+
last_node = None
13+
for i, tree in enumerate(doc.trees):
14+
if i > 0:
15+
if (tree.newdoc is not None or tree.newpar is not None) and last_node.no_space_after:
16+
del last_node.misc["SpaceAfter"]
17+
print(tree)
18+
last_node = tree.descendants[-1]
19+
20+
def process_node(self, node):
21+
self._set_root_deprel(node)
22+
23+
def process_document(self, doc):
24+
self._space_before_pardoc(doc)

0 commit comments

Comments
 (0)