Skip to content

Commit 09d9cd5

Browse files
authored
Merge pull request #3730 from Zac-HD/regex-alphabet
2 parents a173366 + 0cd8ca9 commit 09d9cd5

File tree

16 files changed

+404
-233
lines changed

16 files changed

+404
-233
lines changed

hypothesis-python/RELEASE.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
RELEASE_TYPE: minor
2+
3+
The :func:`~hypothesis.strategies.from_regex` strategy now takes an optional
4+
``alphabet=characters(codec="utf-8")`` argument for unicode strings, like
5+
:func:`~hypothesis.strategies.text`.
6+
7+
This offers more and more-consistent control over the generated strings,
8+
removing previously-hard-coded limitations. With ``fullmatch=False`` and
9+
``alphabet=characters()``, surrogate characters are now possible in leading
10+
and trailing text as well as the body of the match. Negated character classes
11+
such as ``[^A-Z]`` or ``\S`` had a hard-coded exclusion of control characters
12+
and surrogate characters; now they permit anything in ``alphabet=`` consistent
13+
with the class, and control characters are permitted by default.

hypothesis-python/docs/changes.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ help narrow down any particularly weird bugs in complex environments.
144144
-------------------
145145

146146
Fixes some lingering issues with inference of recursive types
147-
in `~hypothesis.strategies.from_type`. Closes :issue:`3525`.
147+
in :func:`~hypothesis.strategies.from_type`. Closes :issue:`3525`.
148148

149149
.. _v6.81.0:
150150

@@ -335,8 +335,8 @@ is strongly recommended. You can ensure you have the dependencies with
335335
-------------------
336336

337337
This patch continues the work started in :pull:`3651` by adding
338-
:pypi:`ruff` linter rules for pyflakes, flake8-comprehensions, and
339-
flake8-implicit-str-concat.
338+
:pypi:`ruff` linter rules for :pypi:`pyflakes`, :pypi:`flake8-comprehensions`,
339+
and :pypi:`flake8-implicit-str-concat`.
340340

341341
.. _v6.75.5:
342342

@@ -1184,7 +1184,7 @@ is really annoying. See :issue:`2701` for details.
11841184
6.48.0 - 2022-06-27
11851185
-------------------
11861186

1187-
This release raises :class:`~unittest.SkipTest` for which never executed any
1187+
This release raises :class:`~unittest.SkipTest` for tests which never executed any
11881188
examples, for example because the :obj:`~hypothesis.settings.phases` setting
11891189
excluded the :obj:`~hypothesis.Phase.explicit`, :obj:`~hypothesis.Phase.reuse`,
11901190
and :obj:`~hypothesis.Phase.generate` phases. This helps to avoid cases where

hypothesis-python/src/hypothesis/core.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from hypothesis.internal.compat import (
7171
PYPY,
7272
BaseExceptionGroup,
73+
add_note,
7374
bad_django_TestCase,
7475
get_type_hints,
7576
int_from_bytes,
@@ -1008,15 +1009,6 @@ def run_engine(self):
10081009
_raise_to_user(errors_to_report, self.settings, report_lines)
10091010

10101011

1011-
def add_note(exc, note):
1012-
try:
1013-
exc.add_note(note)
1014-
except AttributeError:
1015-
if not hasattr(exc, "__notes__"):
1016-
exc.__notes__ = []
1017-
exc.__notes__.append(note)
1018-
1019-
10201012
def _raise_to_user(errors_to_report, settings, target_lines, trailer=""):
10211013
"""Helper function for attaching notes and grouping multiple errors."""
10221014
failing_prefix = "Falsifying example: "

hypothesis-python/src/hypothesis/internal/charmap.py

Lines changed: 13 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from hypothesis.configuration import mkdir_p, storage_directory
2020
from hypothesis.errors import InvalidArgument
21+
from hypothesis.internal.intervalsets import IntervalSet
2122

2223
intervals = Tuple[Tuple[int, int], ...]
2324
cache_type = Dict[Tuple[Tuple[str, ...], int, int, intervals], intervals]
@@ -146,126 +147,6 @@ def as_general_categories(cats, name="cats"):
146147
return tuple(c for c in cs if c in out)
147148

148149

149-
def _union_intervals(x, y):
150-
"""Merge two sequences of intervals into a single tuple of intervals.
151-
152-
Any integer bounded by `x` or `y` is also bounded by the result.
153-
154-
>>> _union_intervals([(3, 10)], [(1, 2), (5, 17)])
155-
((1, 17),)
156-
"""
157-
if not x:
158-
return tuple((u, v) for u, v in y)
159-
if not y:
160-
return tuple((u, v) for u, v in x)
161-
intervals = sorted(x + y, reverse=True)
162-
result = [intervals.pop()]
163-
while intervals:
164-
# 1. intervals is in descending order
165-
# 2. pop() takes from the RHS.
166-
# 3. (a, b) was popped 1st, then (u, v) was popped 2nd
167-
# 4. Therefore: a <= u
168-
# 5. We assume that u <= v and a <= b
169-
# 6. So we need to handle 2 cases of overlap, and one disjoint case
170-
# | u--v | u----v | u--v |
171-
# | a----b | a--b | a--b |
172-
u, v = intervals.pop()
173-
a, b = result[-1]
174-
if u <= b + 1:
175-
# Overlap cases
176-
result[-1] = (a, max(v, b))
177-
else:
178-
# Disjoint case
179-
result.append((u, v))
180-
return tuple(result)
181-
182-
183-
def _subtract_intervals(x, y):
184-
"""Set difference for lists of intervals. That is, returns a list of
185-
intervals that bounds all values bounded by x that are not also bounded by
186-
y. x and y are expected to be in sorted order.
187-
188-
For example _subtract_intervals([(1, 10)], [(2, 3), (9, 15)]) would
189-
return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the
190-
interval.
191-
"""
192-
if not y:
193-
return tuple(x)
194-
x = list(map(list, x))
195-
i = 0
196-
j = 0
197-
result = []
198-
while i < len(x) and j < len(y):
199-
# Iterate in parallel over x and y. j stays pointing at the smallest
200-
# interval in the left hand side that could still overlap with some
201-
# element of x at index >= i.
202-
# Similarly, i is not incremented until we know that it does not
203-
# overlap with any element of y at index >= j.
204-
205-
xl, xr = x[i]
206-
assert xl <= xr
207-
yl, yr = y[j]
208-
assert yl <= yr
209-
210-
if yr < xl:
211-
# The interval at y[j] is strictly to the left of the interval at
212-
# x[i], so will not overlap with it or any later interval of x.
213-
j += 1
214-
elif yl > xr:
215-
# The interval at y[j] is strictly to the right of the interval at
216-
# x[i], so all of x[i] goes into the result as no further intervals
217-
# in y will intersect it.
218-
result.append(x[i])
219-
i += 1
220-
elif yl <= xl:
221-
if yr >= xr:
222-
# x[i] is contained entirely in y[j], so we just skip over it
223-
# without adding it to the result.
224-
i += 1
225-
else:
226-
# The beginning of x[i] is contained in y[j], so we update the
227-
# left endpoint of x[i] to remove this, and increment j as we
228-
# now have moved past it. Note that this is not added to the
229-
# result as is, as more intervals from y may intersect it so it
230-
# may need updating further.
231-
x[i][0] = yr + 1
232-
j += 1
233-
else:
234-
# yl > xl, so the left hand part of x[i] is not contained in y[j],
235-
# so there are some values we should add to the result.
236-
result.append((xl, yl - 1))
237-
238-
if yr + 1 <= xr:
239-
# If y[j] finishes before x[i] does, there may be some values
240-
# in x[i] left that should go in the result (or they may be
241-
# removed by a later interval in y), so we update x[i] to
242-
# reflect that and increment j because it no longer overlaps
243-
# with any remaining element of x.
244-
x[i][0] = yr + 1
245-
j += 1
246-
else:
247-
# Every element of x[i] other than the initial part we have
248-
# already added is contained in y[j], so we move to the next
249-
# interval.
250-
i += 1
251-
# Any remaining intervals in x do not overlap with any of y, as if they did
252-
# we would not have incremented j to the end, so can be added to the result
253-
# as they are.
254-
result.extend(x[i:])
255-
return tuple(map(tuple, result))
256-
257-
258-
def _intervals(s):
259-
"""Return a tuple of intervals, covering the codepoints of characters in
260-
`s`.
261-
262-
>>> _intervals('abcdef0123456789')
263-
((48, 57), (97, 102))
264-
"""
265-
intervals = tuple((ord(c), ord(c)) for c in sorted(s))
266-
return _union_intervals(intervals, intervals)
267-
268-
269150
category_index_cache = {(): ()}
270151

271152

@@ -306,11 +187,14 @@ def _query_for_key(key):
306187
pass
307188
assert key
308189
if set(key) == set(categories()):
309-
result = ((0, sys.maxunicode),)
190+
result = IntervalSet([(0, sys.maxunicode)])
310191
else:
311-
result = _union_intervals(_query_for_key(key[:-1]), charmap()[key[-1]])
312-
category_index_cache[key] = result
313-
return result
192+
result = IntervalSet(_query_for_key(key[:-1])).union(
193+
IntervalSet(charmap()[key[-1]])
194+
)
195+
assert isinstance(result, IntervalSet)
196+
category_index_cache[key] = result.intervals
197+
return result.intervals
314198

315199

316200
limited_category_index_cache: cache_type = {}
@@ -344,14 +228,14 @@ def query(
344228
if max_codepoint is None:
345229
max_codepoint = sys.maxunicode
346230
catkey = _category_key(exclude_categories, include_categories)
347-
character_intervals = _intervals(include_characters or "")
348-
exclude_intervals = _intervals(exclude_characters or "")
231+
character_intervals = IntervalSet.from_string(include_characters or "")
232+
exclude_intervals = IntervalSet.from_string(exclude_characters or "")
349233
qkey = (
350234
catkey,
351235
min_codepoint,
352236
max_codepoint,
353-
character_intervals,
354-
exclude_intervals,
237+
character_intervals.intervals,
238+
exclude_intervals.intervals,
355239
)
356240
try:
357241
return limited_category_index_cache[qkey]
@@ -362,8 +246,6 @@ def query(
362246
for u, v in base:
363247
if v >= min_codepoint and u <= max_codepoint:
364248
result.append((max(u, min_codepoint), min(v, max_codepoint)))
365-
result = tuple(result)
366-
result = _union_intervals(result, character_intervals)
367-
result = _subtract_intervals(result, exclude_intervals)
249+
result = (IntervalSet(result) | character_intervals) - exclude_intervals
368250
limited_category_index_cache[qkey] = result
369251
return result

hypothesis-python/src/hypothesis/internal/compat.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@
4343
WINDOWS = platform.system() == "Windows"
4444

4545

46+
def add_note(exc, note):
47+
try:
48+
exc.add_note(note)
49+
except AttributeError:
50+
if not hasattr(exc, "__notes__"):
51+
exc.__notes__ = []
52+
exc.__notes__.append(note)
53+
54+
4655
def escape_unicode_characters(s: str) -> str:
4756
return codecs.encode(s, "unicode_escape").decode("ascii")
4857

0 commit comments

Comments
 (0)