Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c456735
basic support for coreference
martinpopel Feb 3, 2021
bf25a8a
bump to 0.2.3
martinpopel Feb 3, 2021
156b66e
create_mention improvements
martinpopel Feb 4, 2021
5086ca0
report span correctly, e.g. "3-5,6" if there is 5.1 which is not part…
martinpopel Feb 4, 2021
75e59cf
cluster_mention.head = new_head
martinpopel Feb 4, 2021
64264ec
rename misc["Split"] to misc["SplitAnte"]
martinpopel Feb 5, 2021
e82ff0e
more pythonic
martinpopel Feb 5, 2021
459d4ec
create_empty_child() creates EmptyNode
martinpopel Feb 5, 2021
82a6d2d
fix write.Conllu, so that None attributes are converted to _
martinpopel Feb 5, 2021
dc09ee5
store ord in node._ord internally
martinpopel Feb 5, 2021
e339642
class OrdTuple, so that 1.9 < OrdTuple('1.10')
martinpopel Feb 5, 2021
6dbc65c
mention_words can be an empty string at any time
martinpopel Feb 5, 2021
1d88f03
overload __lt__, so we can use node1 < node2
martinpopel Feb 5, 2021
87ac420
each node now stores an explicit reference to the root (node._root)
martinpopel Feb 6, 2021
27b8be7
keep mention.word always sorted, better error msg when parsing span
martinpopel Feb 6, 2021
a0f6775
bug fix in parsing spans
martinpopel Feb 6, 2021
76ba743
support for ClusterId[1]
martinpopel Feb 6, 2021
ac3f69e
bug fixes: node.ord is never a string now, it can be int, float or Or…
martinpopel Feb 7, 2021
542707f
draft of ordering of CorefMention objects
martinpopel Feb 7, 2021
b27a153
write.TextModeTrees prints empty nodes by default
martinpopel Feb 8, 2021
5f14970
18% faster loading
martinpopel Feb 8, 2021
44c291b
huge speedup: "NewTreex" benchmark 40s -> 25s
martinpopel Feb 9, 2021
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
language: python
python:
- "3.4"
- "3.5"
- "3.6"
- "3.7"
- "3.8"
- "3.9"
before_install:
- sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get update -qq
Expand Down
6 changes: 5 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@ Udapi Change Log
----------------
See https://github.com/udapi/udapi-python/commits/master for details.

0.2.3 2021-02-04
- support for enhanced dependencies and coreference
- requires Python 3.6+ due to f-strings

0.2.2 2018-01-08
- support for loading/storing documents from/to strings
- allow private modules (starting with dot instead of udapi.block)
- MorphoDiTa wrapper udapi/tool/morphodita.py
- root.sent_id returns always the same as root.address()

0.2.1 2017-10-23 the first PyPI release
0.2.1 2017-10-23 the first PyPI release
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Python framework for processing Universal Dependencies data
[![Documentation Status](https://readthedocs.org/projects/udapi/badge/)](http://udapi.readthedocs.io)

## Requirements
- You need Python 3.3 or higher.
- You need Python 3.6 or higher.
- If the [ufal.udpipe](https://pypi.python.org/pypi/ufal.udpipe/) parser is needed,
make sure you have a C++11 compiler (e.g. [g++ 4.7 or newer](.travis.yml#L9))
and install UDPipe with `pip3 install --user --upgrade ufal.udpipe`.
Expand Down
10 changes: 2 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,9 @@

from setuptools import setup, find_packages

# python_requires is supported by pip only from November 2016,
# so let's check the Python version also the old way.
import sys
if sys.version_info < (3, 3):
raise SystemExit('Udapi requires Python 3.3 or higher.')

setup(
name='udapi',
version='0.2.2',
version='0.2.3',
description='Python framework for processing Universal Dependencies data',
long_description=(
'Udapi is an open-source framework providing API for processing '
Expand All @@ -27,7 +21,7 @@
scripts=['bin/udapy'],
tests_require=['pytest'],
install_requires=['colorama', 'termcolor'],
python_requires='>=3.3',
python_requires='>=3.6',
license='GPL 2 or newer',
platforms='any',
)
8 changes: 4 additions & 4 deletions udapi/block/demo/complexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,15 @@ def expand_subtree(self, nodes, expand_type):
#for child in group.children:
#if child.udeprel != 'conj':
#result.extend(child.descendants(add_self=True))
#return = sorted(result, key=lambda n: n.ord)
#return = sorted(result)
if expand_type == 'subtree_within_clause':
stack = [n for n in nodes[0].children if n.udeprel != 'conj']
while stack:
node = stack.pop()
if not node.misc["ClauseHead"]:
nodes.append(node)
stack.extend(node.children())
return sorted(nodes, key=lambda n: n.ord)
return sorted(nodes)
raise ValueError("unknown expand value " + expand_type)


Expand Down Expand Up @@ -182,7 +182,7 @@ def get_coord_phrase(self, root, phrase_type_function):
for conj in conjuncts:
# TODO multiword conjunctions (udeprel=flat)?
conjunctions.extend([n for n in conj.children if n.udeprel == 'cc'])
results.append(sorted([node] + conjuncts + conjunctions, key=lambda n: n.ord))
results.append(sorted([node] + conjuncts + conjunctions))
return results

# TODO koordinace hlavních i vedlejších vět
Expand All @@ -199,7 +199,7 @@ def get_t_units(self, main_heads):
else:
main_clause.append(node)
stack.extend(node.children)
main_clause = sorted(main_clause, key=lambda n: n.ord)
main_clause = sorted(main_clause)

for dep_clause_head in dep_heads:
results.append(main_clause + self.expand_subtree([dep_clause_head], 'subtree'))
Expand Down
3 changes: 2 additions & 1 deletion udapi/block/write/conllu.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def process_tree(self, tree): # pylint: disable=too-many-branches
if next_empty_ord > last_ord:
break
empty = empty_nodes.pop(0)
values = [str(getattr(empty, a)) for a in self.node_attributes]
values = [getattr(empty, attr_name) for attr_name in self.node_attributes]
values = ['_' if v is None else str(v) for v in values]
values[6] = '_'
values[7] = '_'
print('\t'.join(values))
Expand Down
21 changes: 15 additions & 6 deletions udapi/block/write/textmodetrees.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class TextModeTrees(BaseWriter):

def __init__(self, print_sent_id=True, print_text=True, add_empty_line=True, indent=1,
minimize_cross=True, color='auto', attributes='form,upos,deprel',
print_undef_as='_', print_doc_meta=True, print_comments=False,
print_undef_as='_', print_doc_meta=True, print_comments=False, print_empty=True,
mark='ToDo|ToDoOrigText|Bug|Mark', marked_only=False, hints=True,
layout='classic', **kwargs):
"""Create new TextModeTrees block object.
Expand All @@ -156,6 +156,7 @@ def __init__(self, print_sent_id=True, print_text=True, add_empty_line=True, ind
print_undef_as: What should be printed instead of undefined attribute values (if any)?
print_doc_meta: Print `document.meta` metadata before each document?
print_comments: Print comments (other than sent_id and text)?
print_empty: Print empty nodes?
mark: a regex. If `re.search(mark + '=', str(node.misc))` the node is highlighted.
If `print_comments and re.search(r'^ (%s) = ' % mark, root.comment, re.M)`
the comment is highlighted.
Expand All @@ -178,6 +179,7 @@ def __init__(self, print_sent_id=True, print_text=True, add_empty_line=True, ind
self.print_undef_as = print_undef_as
self.print_doc_meta = print_doc_meta
self.print_comments = print_comments
self.print_empty = print_empty
self.mark = mark
self.marked_only = marked_only
self.layout = layout
Expand Down Expand Up @@ -222,20 +224,23 @@ def _compute_gaps(self, node):
self._gaps[node.ord] = rmost - lmost - descs
return lmost, rmost, descs + 1

def should_print_tree(self, root):
def should_print_tree(self, root, allnodes):
"""Should this tree be printed?"""
if not self.marked_only:
return True
if any(self.is_marked(n) for n in root.descendants(add_self=1)):
if any(self.is_marked(n) for n in allnodes):
return True
if not self.print_comments or root.comment is None or self.mark_re is None:
return False
return self.comment_mark_re.search(root.comment)

def process_tree(self, root):
"""Print the tree to (possibly redirected) sys.stdout."""
allnodes = root.descendants(add_self=1)
if not self.should_print_tree(root):
if self.print_empty:
allnodes = [root] + root.descendants_and_empty
else:
allnodes = root.descendants(add_self=1)
if not self.should_print_tree(root, allnodes):
return
self._index_of = {allnodes[i].ord: i for i in range(len(allnodes))}
self.lines = [''] * len(allnodes)
Expand Down Expand Up @@ -281,7 +286,11 @@ def process_tree(self, root):
if self.minimize_cross:
stack = sorted(stack, key=lambda x: -self._gaps[x.ord])

if self.layout != 'classic':
if self.layout == 'classic':
for idx, node in enumerate(allnodes):
if node.is_empty():
self.add_node(idx, node)
else:
columns_attrs = [[a] for a in self.attrs] if self.layout == 'align' else [self.attrs]
for col_attrs in columns_attrs:
self.attrs = col_attrs
Expand Down
2 changes: 2 additions & 0 deletions udapi/core/basewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import logging

import udapi.core.coref
from udapi.core.block import Block
from udapi.core.files import Files

Expand Down Expand Up @@ -39,6 +40,7 @@ def next_filename(self):
return self.files.next_filename()

def before_process_document(self, document):
udapi.core.coref.store_coref_to_misc(document)
if self.orig_files == '<filehandle>':
logging.info('Writing to filehandle.')
sys.stdout = self.files.filehandle
Expand Down
2 changes: 1 addition & 1 deletion udapi/core/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def process_node(self, _):

def process_tree(self, tree):
"""Process a UD tree"""
for node in tree.descendants:
for node in tree._descendants:
self.process_node(node)

def process_bundle(self, bundle):
Expand Down
Loading