HypothesisWorks
diff --git a/‎hypothesis-python/RELEASE.rst‎
Lines changed: 13 additions & 0 deletions b/‎hypothesis-python/RELEASE.rst‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎hypothesis-python/docs/changes.rst‎
Lines changed: 4 additions & 4 deletions b/‎hypothesis-python/docs/changes.rst‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎hypothesis-python/src/hypothesis/core.py‎
Lines changed: 1 addition & 9 deletions b/‎hypothesis-python/src/hypothesis/core.py‎
Lines changed: 1 addition & 9 deletions
diff --git a/‎hypothesis-python/src/hypothesis/internal/charmap.py‎
Lines changed: 13 additions & 131 deletions b/‎hypothesis-python/src/hypothesis/internal/charmap.py‎
Lines changed: 13 additions & 131 deletions
diff --git a/‎hypothesis-python/src/hypothesis/internal/compat.py‎
Lines changed: 9 additions & 0 deletions b/‎hypothesis-python/src/hypothesis/internal/compat.py‎
Lines changed: 9 additions & 0 deletions
@@ -0,0 +1,13 @@
+RELEASE_TYPE: minor
+
+The :func:`~hypothesis.strategies.from_regex` strategy now takes an optional
+``alphabet=characters(codec="utf-8")`` argument for unicode strings, like
+:func:`~hypothesis.strategies.text`.
+
+This offers more and more-consistent control over the generated strings,
+removing previously-hard-coded limitations. With ``fullmatch=False`` and
+``alphabet=characters()``, surrogate characters are now possible in leading
+and trailing text as well as the body of the match. Negated character classes
+such as ``[^A-Z]`` or ``\S`` had a hard-coded exclusion of control characters
+and surrogate characters; now they permit anything in ``alphabet=`` consistent
+with the class, and control characters are permitted by default.
@@ -144,7 +144,7 @@ help narrow down any particularly weird bugs in complex environments.
 -------------------
 
 Fixes some lingering issues with inference of recursive types
-in `~hypothesis.strategies.from_type`. Closes :issue:`3525`.
+in :func:`~hypothesis.strategies.from_type`. Closes :issue:`3525`.
 
 .. _v6.81.0:
 
@@ -335,8 +335,8 @@ is strongly recommended. You can ensure you have the dependencies with
 -------------------
 
 This patch continues the work started in :pull:`3651` by adding
-:pypi:`ruff` linter rules for pyflakes, flake8-comprehensions, and
-flake8-implicit-str-concat.
+:pypi:`ruff` linter rules for :pypi:`pyflakes`, :pypi:`flake8-comprehensions`,
+and :pypi:`flake8-implicit-str-concat`.
 
 .. _v6.75.5:
 
@@ -1184,7 +1184,7 @@ is really annoying. See :issue:`2701` for details.
 6.48.0 - 2022-06-27
 -------------------
 
-This release raises :class:`~unittest.SkipTest` for which never executed any
+This release raises :class:`~unittest.SkipTest` for tests which never executed any
 examples, for example because the :obj:`~hypothesis.settings.phases` setting
 excluded the :obj:`~hypothesis.Phase.explicit`, :obj:`~hypothesis.Phase.reuse`,
 and :obj:`~hypothesis.Phase.generate` phases. This helps to avoid cases where
 
@@ -70,6 +70,7 @@
 from hypothesis.internal.compat import (
  PYPY,
  BaseExceptionGroup,
+ add_note,
  bad_django_TestCase,
  get_type_hints,
  int_from_bytes,
@@ -1008,15 +1009,6 @@ def run_engine(self):
  _raise_to_user(errors_to_report, self.settings, report_lines)
 
 
-def add_note(exc, note):
- try:
- exc.add_note(note)
- except AttributeError:
- if not hasattr(exc, "__notes__"):
- exc.__notes__ = []
- exc.__notes__.append(note)
-
-
 def _raise_to_user(errors_to_report, settings, target_lines, trailer=""):
  """Helper function for attaching notes and grouping multiple errors."""
  failing_prefix = "Falsifying example: "
 
@@ -18,6 +18,7 @@
 
 from hypothesis.configuration import mkdir_p, storage_directory
 from hypothesis.errors import InvalidArgument
+from hypothesis.internal.intervalsets import IntervalSet
 
 intervals = Tuple[Tuple[int, int], ...]
 cache_type = Dict[Tuple[Tuple[str, ...], int, int, intervals], intervals]
@@ -146,126 +147,6 @@ def as_general_categories(cats, name="cats"):
  return tuple(c for c in cs if c in out)
 
 
-def _union_intervals(x, y):
- """Merge two sequences of intervals into a single tuple of intervals.
-
- Any integer bounded by `x` or `y` is also bounded by the result.
-
- >>> _union_intervals([(3, 10)], [(1, 2), (5, 17)])
- ((1, 17),)
- """
- if not x:
- return tuple((u, v) for u, v in y)
- if not y:
- return tuple((u, v) for u, v in x)
- intervals = sorted(x + y, reverse=True)
- result = [intervals.pop()]
- while intervals:
- # 1. intervals is in descending order
- # 2. pop() takes from the RHS.
- # 3. (a, b) was popped 1st, then (u, v) was popped 2nd
- # 4. Therefore: a <= u
- # 5. We assume that u <= v and a <= b
- # 6. So we need to handle 2 cases of overlap, and one disjoint case
- # | u--v | u----v | u--v |
- # | a----b | a--b | a--b |
- u, v = intervals.pop()
- a, b = result[-1]
- if u <= b + 1:
- # Overlap cases
- result[-1] = (a, max(v, b))
- else:
- # Disjoint case
- result.append((u, v))
- return tuple(result)
-
-
-def _subtract_intervals(x, y):
- """Set difference for lists of intervals. That is, returns a list of
- intervals that bounds all values bounded by x that are not also bounded by
- y. x and y are expected to be in sorted order.
-
- For example _subtract_intervals([(1, 10)], [(2, 3), (9, 15)]) would
- return [(1, 1), (4, 8)], removing the values 2, 3, 9 and 10 from the
- interval.
- """
- if not y:
- return tuple(x)
- x = list(map(list, x))
- i = 0
- j = 0
- result = []
- while i < len(x) and j < len(y):
- # Iterate in parallel over x and y. j stays pointing at the smallest
- # interval in the left hand side that could still overlap with some
- # element of x at index >= i.
- # Similarly, i is not incremented until we know that it does not
- # overlap with any element of y at index >= j.
-
- xl, xr = x[i]
- assert xl <= xr
- yl, yr = y[j]
- assert yl <= yr
-
- if yr < xl:
- # The interval at y[j] is strictly to the left of the interval at
- # x[i], so will not overlap with it or any later interval of x.
- j += 1
- elif yl > xr:
- # The interval at y[j] is strictly to the right of the interval at
- # x[i], so all of x[i] goes into the result as no further intervals
- # in y will intersect it.
- result.append(x[i])
- i += 1
- elif yl <= xl:
- if yr >= xr:
- # x[i] is contained entirely in y[j], so we just skip over it
- # without adding it to the result.
- i += 1
- else:
- # The beginning of x[i] is contained in y[j], so we update the
- # left endpoint of x[i] to remove this, and increment j as we
- # now have moved past it. Note that this is not added to the
- # result as is, as more intervals from y may intersect it so it
- # may need updating further.
- x[i][0] = yr + 1
- j += 1
- else:
- # yl > xl, so the left hand part of x[i] is not contained in y[j],
- # so there are some values we should add to the result.
- result.append((xl, yl - 1))
-
- if yr + 1 <= xr:
- # If y[j] finishes before x[i] does, there may be some values
- # in x[i] left that should go in the result (or they may be
- # removed by a later interval in y), so we update x[i] to
- # reflect that and increment j because it no longer overlaps
- # with any remaining element of x.
- x[i][0] = yr + 1
- j += 1
- else:
- # Every element of x[i] other than the initial part we have
- # already added is contained in y[j], so we move to the next
- # interval.
- i += 1
- # Any remaining intervals in x do not overlap with any of y, as if they did
- # we would not have incremented j to the end, so can be added to the result
- # as they are.
- result.extend(x[i:])
- return tuple(map(tuple, result))
-
-
-def _intervals(s):
- """Return a tuple of intervals, covering the codepoints of characters in
- `s`.
-
- >>> _intervals('abcdef0123456789')
- ((48, 57), (97, 102))
- """
- intervals = tuple((ord(c), ord(c)) for c in sorted(s))
- return _union_intervals(intervals, intervals)
-
-
 category_index_cache = {(): ()}
 
 
@@ -306,11 +187,14 @@ def _query_for_key(key):
  pass
  assert key
  if set(key) == set(categories()):
- result = ((0, sys.maxunicode),)
+ result = IntervalSet([(0, sys.maxunicode)])
  else:
- result = _union_intervals(_query_for_key(key[:-1]), charmap()[key[-1]])
- category_index_cache[key] = result
- return result
+ result = IntervalSet(_query_for_key(key[:-1])).union(
+ IntervalSet(charmap()[key[-1]])
+ )
+ assert isinstance(result, IntervalSet)
+ category_index_cache[key] = result.intervals
+ return result.intervals
 
 
 limited_category_index_cache: cache_type = {}
@@ -344,14 +228,14 @@ def query(
  if max_codepoint is None:
  max_codepoint = sys.maxunicode
  catkey = _category_key(exclude_categories, include_categories)
- character_intervals = _intervals(include_characters or "")
- exclude_intervals = _intervals(exclude_characters or "")
+ character_intervals = IntervalSet.from_string(include_characters or "")
+ exclude_intervals = IntervalSet.from_string(exclude_characters or "")
  qkey = (
  catkey,
  min_codepoint,
  max_codepoint,
- character_intervals,
- exclude_intervals,
+ character_intervals.intervals,
+ exclude_intervals.intervals,
  )
  try:
  return limited_category_index_cache[qkey]
@@ -362,8 +246,6 @@ def query(
  for u, v in base:
  if v >= min_codepoint and u <= max_codepoint:
  result.append((max(u, min_codepoint), min(v, max_codepoint)))
- result = tuple(result)
- result = _union_intervals(result, character_intervals)
- result = _subtract_intervals(result, exclude_intervals)
+ result = (IntervalSet(result) | character_intervals) - exclude_intervals
  limited_category_index_cache[qkey] = result
  return result
@@ -43,6 +43,15 @@
 WINDOWS = platform.system() == "Windows"
 
 
+def add_note(exc, note):
+ try:
+ exc.add_note(note)
+ except AttributeError:
+ if not hasattr(exc, "__notes__"):
+ exc.__notes__ = []
+ exc.__notes__.append(note)
+
+
 def escape_unicode_characters(s: str) -> str:
  return codecs.encode(s, "unicode_escape").decode("ascii")