udapi · martinpopel · Feb 9, 2021 · Feb 3, 2021 · Feb 3, 2021 · Feb 4, 2021
diff --git a/.travis.yml b/.travis.yml
@@ -1,9 +1,9 @@
 language: python
 python:
- - "3.4"
- - "3.5"
  - "3.6"
  - "3.7"
+ - "3.8"
+ - "3.9"
 before_install:
  - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
  - sudo apt-get update -qq

diff --git a/CHANGES.txt b/CHANGES.txt
@@ -2,10 +2,14 @@ Udapi Change Log
 ----------------
 See https://github.com/udapi/udapi-python/commits/master for details.
 
+0.2.3 2021-02-04
+ - support for enhanced dependencies and coreference
+ - requires Python 3.6+ due to f-strings
+
 0.2.2 2018-01-08
  - support for loading/storing documents from/to strings
  - allow private modules (starting with dot instead of udapi.block)
  - MorphoDiTa wrapper udapi/tool/morphodita.py
  - root.sent_id returns always the same as root.address()
 
-0.2.1 2017-10-23 the first PyPI release
+0.2.1 2017-10-23 the first PyPI release
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Python framework for processing Universal Dependencies data
 [![Documentation Status](https://readthedocs.org/projects/udapi/badge/)](http://udapi.readthedocs.io)
 
 ## Requirements
-- You need Python 3.3 or higher.
+- You need Python 3.6 or higher.
 - If the [ufal.udpipe](https://pypi.python.org/pypi/ufal.udpipe/) parser is needed,
  make sure you have a C++11 compiler (e.g. [g++ 4.7 or newer](.travis.yml#L9))
  and install UDPipe with `pip3 install --user --upgrade ufal.udpipe`.

diff --git a/setup.py b/setup.py
@@ -2,15 +2,9 @@
 
 from setuptools import setup, find_packages
 
-# python_requires is supported by pip only from November 2016,
-# so let's check the Python version also the old way.
-import sys
-if sys.version_info < (3, 3):
- raise SystemExit('Udapi requires Python 3.3 or higher.')
-
 setup(
  name='udapi',
- version='0.2.2',
+ version='0.2.3',
  description='Python framework for processing Universal Dependencies data',
  long_description=(
  'Udapi is an open-source framework providing API for processing '
@@ -27,7 +21,7 @@
  scripts=['bin/udapy'],
  tests_require=['pytest'],
  install_requires=['colorama', 'termcolor'],
- python_requires='>=3.3',
+ python_requires='>=3.6',
  license='GPL 2 or newer',
  platforms='any',
 )
diff --git a/udapi/block/demo/complexity.py b/udapi/block/demo/complexity.py
@@ -144,15 +144,15 @@ def expand_subtree(self, nodes, expand_type):
  #for child in group.children:
  #if child.udeprel != 'conj':
  #result.extend(child.descendants(add_self=True))
- #return = sorted(result, key=lambda n: n.ord)
+ #return = sorted(result)
  if expand_type == 'subtree_within_clause':
  stack = [n for n in nodes[0].children if n.udeprel != 'conj']
  while stack:
  node = stack.pop()
  if not node.misc["ClauseHead"]:
  nodes.append(node)
  stack.extend(node.children())
- return sorted(nodes, key=lambda n: n.ord)
+ return sorted(nodes)
  raise ValueError("unknown expand value " + expand_type)
 
 
@@ -182,7 +182,7 @@ def get_coord_phrase(self, root, phrase_type_function):
  for conj in conjuncts:
  # TODO multiword conjunctions (udeprel=flat)?
  conjunctions.extend([n for n in conj.children if n.udeprel == 'cc'])
- results.append(sorted([node] + conjuncts + conjunctions, key=lambda n: n.ord))
+ results.append(sorted([node] + conjuncts + conjunctions))
  return results
 
  # TODO koordinace hlavních i vedlejších vět
@@ -199,7 +199,7 @@ def get_t_units(self, main_heads):
  else:
  main_clause.append(node)
  stack.extend(node.children)
- main_clause = sorted(main_clause, key=lambda n: n.ord)
+ main_clause = sorted(main_clause)
 
  for dep_clause_head in dep_heads:
  results.append(main_clause + self.expand_subtree([dep_clause_head], 'subtree'))

diff --git a/udapi/block/write/conllu.py b/udapi/block/write/conllu.py
@@ -56,7 +56,8 @@ def process_tree(self, tree): # pylint: disable=too-many-branches
  if next_empty_ord > last_ord:
  break
  empty = empty_nodes.pop(0)
- values = [str(getattr(empty, a)) for a in self.node_attributes]
+ values = [getattr(empty, attr_name) for attr_name in self.node_attributes]
+ values = ['_' if v is None else str(v) for v in values]
  values[6] = '_'
  values[7] = '_'
  print('\t'.join(values))

diff --git a/udapi/block/write/textmodetrees.py b/udapi/block/write/textmodetrees.py
@@ -132,7 +132,7 @@ class TextModeTrees(BaseWriter):
 
  def __init__(self, print_sent_id=True, print_text=True, add_empty_line=True, indent=1,
  minimize_cross=True, color='auto', attributes='form,upos,deprel',
- print_undef_as='_', print_doc_meta=True, print_comments=False,
+ print_undef_as='_', print_doc_meta=True, print_comments=False, print_empty=True,
  mark='ToDo|ToDoOrigText|Bug|Mark', marked_only=False, hints=True,
  layout='classic', **kwargs):
  """Create new TextModeTrees block object.
@@ -156,6 +156,7 @@ def __init__(self, print_sent_id=True, print_text=True, add_empty_line=True, ind
  print_undef_as: What should be printed instead of undefined attribute values (if any)?
  print_doc_meta: Print `document.meta` metadata before each document?
  print_comments: Print comments (other than sent_id and text)?
+ print_empty: Print empty nodes?
  mark: a regex. If `re.search(mark + '=', str(node.misc))` the node is highlighted.
  If `print_comments and re.search(r'^ (%s) = ' % mark, root.comment, re.M)`
  the comment is highlighted.
@@ -178,6 +179,7 @@ def __init__(self, print_sent_id=True, print_text=True, add_empty_line=True, ind
  self.print_undef_as = print_undef_as
  self.print_doc_meta = print_doc_meta
  self.print_comments = print_comments
+ self.print_empty = print_empty
  self.mark = mark
  self.marked_only = marked_only
  self.layout = layout
@@ -222,20 +224,23 @@ def _compute_gaps(self, node):
  self._gaps[node.ord] = rmost - lmost - descs
  return lmost, rmost, descs + 1
 
- def should_print_tree(self, root):
+ def should_print_tree(self, root, allnodes):
  """Should this tree be printed?"""
  if not self.marked_only:
  return True
- if any(self.is_marked(n) for n in root.descendants(add_self=1)):
+ if any(self.is_marked(n) for n in allnodes):
  return True
  if not self.print_comments or root.comment is None or self.mark_re is None:
  return False
  return self.comment_mark_re.search(root.comment)
 
  def process_tree(self, root):
  """Print the tree to (possibly redirected) sys.stdout."""
- allnodes = root.descendants(add_self=1)
- if not self.should_print_tree(root):
+ if self.print_empty:
+ allnodes = [root] + root.descendants_and_empty
+ else:
+ allnodes = root.descendants(add_self=1)
+ if not self.should_print_tree(root, allnodes):
  return
  self._index_of = {allnodes[i].ord: i for i in range(len(allnodes))}
  self.lines = [''] * len(allnodes)
@@ -281,7 +286,11 @@ def process_tree(self, root):
  if self.minimize_cross:
  stack = sorted(stack, key=lambda x: -self._gaps[x.ord])
 
- if self.layout != 'classic':
+ if self.layout == 'classic':
+ for idx, node in enumerate(allnodes):
+ if node.is_empty():
+ self.add_node(idx, node)
+ else:
  columns_attrs = [[a] for a in self.attrs] if self.layout == 'align' else [self.attrs]
  for col_attrs in columns_attrs:
  self.attrs = col_attrs

diff --git a/udapi/core/basewriter.py b/udapi/core/basewriter.py
@@ -2,6 +2,7 @@
 import sys
 import logging
 
+import udapi.core.coref
 from udapi.core.block import Block
 from udapi.core.files import Files
 
@@ -39,6 +40,7 @@ def next_filename(self):
  return self.files.next_filename()
 
  def before_process_document(self, document):
+ udapi.core.coref.store_coref_to_misc(document)
  if self.orig_files == '<filehandle>':
  logging.info('Writing to filehandle.')
  sys.stdout = self.files.filehandle

diff --git a/udapi/core/block.py b/udapi/core/block.py
@@ -29,7 +29,7 @@ def process_node(self, _):
 
  def process_tree(self, tree):
  """Process a UD tree"""
- for node in tree.descendants:
+ for node in tree._descendants:
  self.process_node(node)
 
  def process_bundle(self, bundle):