Skip to content
88 changes: 88 additions & 0 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys
import unittest
import textwrap

from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
Expand All @@ -28,6 +29,41 @@ def get_header(self, data):
headers[key] = val.strip()
return headers

def get_msgids(self, data):
""" utility: return all msgids in .po file as a list of strings """
msgids = []
reading_msgid = False
cur_msgid = []
for line in data.split('\n'):
if reading_msgid:
if line.startswith('"'):
cur_msgid.append(line.strip('"'))
else:
msgids.append('\n'.join(cur_msgid))
cur_msgid = []
reading_msgid = False
continue
if line.startswith('msgid '):
line = line[len('msgid '):]
cur_msgid.append(line.strip('"'))
reading_msgid = True
else:
if reading_msgid:
msgids.append('\n'.join(cur_msgid))

return msgids

def extract_docstrings_from_str(self, module_content):
""" utility: return all msgids extracted from module_content """
filename = 'test_docstrings.py'
with temp_cwd(None) as cwd:
with open(filename, 'w') as fp:
fp.write(module_content)
assert_python_ok(self.script, '-D', filename)
with open('messages.pot') as fp:
data = fp.read()
return self.get_msgids(data)

def test_header(self):
"""Make sure the required fields are in the header, according to:
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
Expand Down Expand Up @@ -72,3 +108,55 @@ def test_POT_Creation_Date(self):

# This will raise if the date format does not exactly match.
datetime.strptime(creationDate, '%Y-%m-%d %H:%M%z')

def test_funcdocstring_annotated_args(self):
""" Test docstrings for functions with annotated args """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
def foo(bar: str):
"""doc"""
'''))
self.assertIn('doc', msgids)

def test_funcdocstring_annotated_return(self):
""" Test docstrings for functions with annotated return type """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
def foo(bar) -> str:
"""doc"""
'''))
self.assertIn('doc', msgids)

def test_funcdocstring_defvalue_args(self):
""" Test docstring for functions with default arg values """
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
def foo(bar=()):
"""doc"""
'''))
self.assertIn('doc', msgids)

def test_funcdocstring_multiple_funcs(self):
""" Test docstring extraction for multiple functions combining
annotated args, annotated return types and default arg values
"""
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
def foo1(bar: tuple=()) -> str:
"""doc1"""

def foo2(bar: List[1:2]) -> (lambda x: x):
"""doc2"""

def foo3(bar: 'func'=lambda x: x) -> {1: 2}:
"""doc3"""
'''))
self.assertIn('doc1', msgids)
self.assertIn('doc2', msgids)
self.assertIn('doc3', msgids)

def test_classdocstring_early_colon(self):
""" Test docstring extraction for a class with colons occuring within
the parentheses.
"""
msgids = self.extract_docstrings_from_str(textwrap.dedent('''\
class D(L[1:2], F({1: 2}), metaclass=M(lambda x: x)):
"""doc"""
'''))
self.assertIn('doc', msgids)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix pygettext not extracting docstrings for functions with type annotated
arguments.
15 changes: 11 additions & 4 deletions Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def __init__(self, options):
self.__lineno = -1
self.__freshmodule = 1
self.__curfile = None
self.__enclosurecount = 0

def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
Expand All @@ -340,17 +341,23 @@ def __waiting(self, ttype, tstring, lineno):
elif ttype not in (tokenize.COMMENT, tokenize.NL):
self.__freshmodule = 0
return
# class docstring?
# class or func/method docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen

def __suiteseen(self, ttype, tstring, lineno):
# ignore anything until we see the colon
if ttype == tokenize.OP and tstring == ':':
self.__state = self.__suitedocstring
# skip over any enclosure pairs until we see the colon
if ttype == tokenize.OP:
if tstring == ':' and self.__enclosurecount == 0:
# we see a colon and we're not in an enclosure: end of def
self.__state = self.__suitedocstring
elif tstring in '([{':
self.__enclosurecount += 1
elif tstring in ')]}':
self.__enclosurecount -= 1

def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
Expand Down