|  | 
|  | 1 | +# -*- coding: utf-8 -*- | 
|  | 2 | +""" | 
|  | 3 | +Helper functions to generate range-like data for DatetimeArray | 
|  | 4 | +(and possibly TimedeltaArray/PeriodArray) | 
|  | 5 | +""" | 
|  | 6 | + | 
|  | 7 | +import numpy as np | 
|  | 8 | + | 
|  | 9 | +from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp | 
|  | 10 | + | 
|  | 11 | +from pandas.tseries.offsets import Tick, generate_range | 
|  | 12 | + | 
|  | 13 | + | 
|  | 14 | +def generate_regular_range(start, end, periods, freq): | 
|  | 15 | + """ | 
|  | 16 | + Generate a range of dates with the spans between dates described by | 
|  | 17 | + the given `freq` DateOffset. | 
|  | 18 | +
 | 
|  | 19 | + Parameters | 
|  | 20 | + ---------- | 
|  | 21 | + start : Timestamp or None | 
|  | 22 | + first point of produced date range | 
|  | 23 | + end : Timestamp or None | 
|  | 24 | + last point of produced date range | 
|  | 25 | + periods : int | 
|  | 26 | + number of periods in produced date range | 
|  | 27 | + freq : DateOffset | 
|  | 28 | + describes space between dates in produced date range | 
|  | 29 | +
 | 
|  | 30 | + Returns | 
|  | 31 | + ------- | 
|  | 32 | + ndarray[np.int64] representing nanosecond unix timestamps | 
|  | 33 | + """ | 
|  | 34 | + if isinstance(freq, Tick): | 
|  | 35 | + stride = freq.nanos | 
|  | 36 | + if periods is None: | 
|  | 37 | + b = Timestamp(start).value | 
|  | 38 | + # cannot just use e = Timestamp(end) + 1 because arange breaks when | 
|  | 39 | + # stride is too large, see GH10887 | 
|  | 40 | + e = (b + (Timestamp(end).value - b) // stride * stride + | 
|  | 41 | + stride // 2 + 1) | 
|  | 42 | + # end.tz == start.tz by this point due to _generate implementation | 
|  | 43 | + tz = start.tz | 
|  | 44 | + elif start is not None: | 
|  | 45 | + b = Timestamp(start).value | 
|  | 46 | + e = _generate_range_overflow_safe(b, periods, stride, side='start') | 
|  | 47 | + tz = start.tz | 
|  | 48 | + elif end is not None: | 
|  | 49 | + e = Timestamp(end).value + stride | 
|  | 50 | + b = _generate_range_overflow_safe(e, periods, stride, side='end') | 
|  | 51 | + tz = end.tz | 
|  | 52 | + else: | 
|  | 53 | + raise ValueError("at least 'start' or 'end' should be specified " | 
|  | 54 | + "if a 'period' is given.") | 
|  | 55 | + | 
|  | 56 | + with np.errstate(over="raise"): | 
|  | 57 | + # If the range is sufficiently large, np.arange may overflow | 
|  | 58 | + # and incorrectly return an empty array if not caught. | 
|  | 59 | + try: | 
|  | 60 | + values = np.arange(b, e, stride, dtype=np.int64) | 
|  | 61 | + except FloatingPointError: | 
|  | 62 | + xdr = [b] | 
|  | 63 | + while xdr[-1] != e: | 
|  | 64 | + xdr.append(xdr[-1] + stride) | 
|  | 65 | + values = np.array(xdr[:-1], dtype=np.int64) | 
|  | 66 | + | 
|  | 67 | + else: | 
|  | 68 | + tz = None | 
|  | 69 | + # start and end should have the same timezone by this point | 
|  | 70 | + if start is not None: | 
|  | 71 | + tz = start.tz | 
|  | 72 | + elif end is not None: | 
|  | 73 | + tz = end.tz | 
|  | 74 | + | 
|  | 75 | + xdr = generate_range(start=start, end=end, | 
|  | 76 | + periods=periods, offset=freq) | 
|  | 77 | + | 
|  | 78 | + values = np.array([x.value for x in xdr], dtype=np.int64) | 
|  | 79 | + | 
|  | 80 | + return values, tz | 
|  | 81 | + | 
|  | 82 | + | 
|  | 83 | +def _generate_range_overflow_safe(endpoint, periods, stride, side='start'): | 
|  | 84 | + """ | 
|  | 85 | + Calculate the second endpoint for passing to np.arange, checking | 
|  | 86 | + to avoid an integer overflow. Catch OverflowError and re-raise | 
|  | 87 | + as OutOfBoundsDatetime. | 
|  | 88 | +
 | 
|  | 89 | + Parameters | 
|  | 90 | + ---------- | 
|  | 91 | + endpoint : int | 
|  | 92 | + nanosecond timestamp of the known endpoint of the desired range | 
|  | 93 | + periods : int | 
|  | 94 | + number of periods in the desired range | 
|  | 95 | + stride : int | 
|  | 96 | + nanoseconds between periods in the desired range | 
|  | 97 | + side : {'start', 'end'} | 
|  | 98 | + which end of the range `endpoint` refers to | 
|  | 99 | +
 | 
|  | 100 | + Returns | 
|  | 101 | + ------- | 
|  | 102 | + other_end : int | 
|  | 103 | +
 | 
|  | 104 | + Raises | 
|  | 105 | + ------ | 
|  | 106 | + OutOfBoundsDatetime | 
|  | 107 | + """ | 
|  | 108 | + # GH#14187 raise instead of incorrectly wrapping around | 
|  | 109 | + assert side in ['start', 'end'] | 
|  | 110 | + | 
|  | 111 | + i64max = np.uint64(np.iinfo(np.int64).max) | 
|  | 112 | + msg = ('Cannot generate range with {side}={endpoint} and ' | 
|  | 113 | + 'periods={periods}' | 
|  | 114 | + .format(side=side, endpoint=endpoint, periods=periods)) | 
|  | 115 | + | 
|  | 116 | + with np.errstate(over="raise"): | 
|  | 117 | + # if periods * strides cannot be multiplied within the *uint64* bounds, | 
|  | 118 | + # we cannot salvage the operation by recursing, so raise | 
|  | 119 | + try: | 
|  | 120 | + addend = np.uint64(periods) * np.uint64(np.abs(stride)) | 
|  | 121 | + except FloatingPointError: | 
|  | 122 | + raise OutOfBoundsDatetime(msg) | 
|  | 123 | + | 
|  | 124 | + if np.abs(addend) <= i64max: | 
|  | 125 | + # relatively easy case without casting concerns | 
|  | 126 | + return _generate_range_overflow_safe_signed( | 
|  | 127 | + endpoint, periods, stride, side) | 
|  | 128 | + | 
|  | 129 | + elif ((endpoint > 0 and side == 'start' and stride > 0) or | 
|  | 130 | + (endpoint < 0 and side == 'end' and stride > 0)): | 
|  | 131 | + # no chance of not-overflowing | 
|  | 132 | + raise OutOfBoundsDatetime(msg) | 
|  | 133 | + | 
|  | 134 | + elif (side == 'end' and endpoint > i64max and endpoint - stride <= i64max): | 
|  | 135 | + # in _generate_regular_range we added `stride` thereby overflowing | 
|  | 136 | + # the bounds. Adjust to fix this. | 
|  | 137 | + return _generate_range_overflow_safe(endpoint - stride, | 
|  | 138 | + periods - 1, stride, side) | 
|  | 139 | + | 
|  | 140 | + # split into smaller pieces | 
|  | 141 | + mid_periods = periods // 2 | 
|  | 142 | + remaining = periods - mid_periods | 
|  | 143 | + assert 0 < remaining < periods, (remaining, periods, endpoint, stride) | 
|  | 144 | + | 
|  | 145 | + midpoint = _generate_range_overflow_safe(endpoint, mid_periods, | 
|  | 146 | + stride, side) | 
|  | 147 | + return _generate_range_overflow_safe(midpoint, remaining, stride, side) | 
|  | 148 | + | 
|  | 149 | + | 
|  | 150 | +def _generate_range_overflow_safe_signed(endpoint, periods, stride, side): | 
|  | 151 | + """ | 
|  | 152 | + A special case for _generate_range_overflow_safe where `periods * stride` | 
|  | 153 | + can be calculated without overflowing int64 bounds. | 
|  | 154 | + """ | 
|  | 155 | + assert side in ['start', 'end'] | 
|  | 156 | + if side == 'end': | 
|  | 157 | + stride *= -1 | 
|  | 158 | + | 
|  | 159 | + with np.errstate(over="raise"): | 
|  | 160 | + addend = np.int64(periods) * np.int64(stride) | 
|  | 161 | + try: | 
|  | 162 | + # easy case with no overflows | 
|  | 163 | + return np.int64(endpoint) + addend | 
|  | 164 | + except (FloatingPointError, OverflowError): | 
|  | 165 | + # with endpoint negative and addend positive we risk | 
|  | 166 | + # FloatingPointError; with reversed signed we risk OverflowError | 
|  | 167 | + pass | 
|  | 168 | + | 
|  | 169 | + # if stride and endpoint had opposite signs, then endpoint + addend | 
|  | 170 | + # should never overflow. so they must have the same signs | 
|  | 171 | + assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0) | 
|  | 172 | + | 
|  | 173 | + if stride > 0: | 
|  | 174 | + # watch out for very special case in which we just slightly | 
|  | 175 | + # exceed implementation bounds, but when passing the result to | 
|  | 176 | + # np.arange will get a result slightly within the bounds | 
|  | 177 | + assert endpoint >= 0 | 
|  | 178 | + result = np.uint64(endpoint) + np.uint64(addend) | 
|  | 179 | + i64max = np.uint64(np.iinfo(np.int64).max) | 
|  | 180 | + assert result > i64max | 
|  | 181 | + if result <= i64max + np.uint64(stride): | 
|  | 182 | + return result | 
|  | 183 | + | 
|  | 184 | + raise OutOfBoundsDatetime('Cannot generate range with ' | 
|  | 185 | + '{side}={endpoint} and ' | 
|  | 186 | + 'periods={periods}' | 
|  | 187 | + .format(side=side, endpoint=endpoint, | 
|  | 188 | + periods=periods)) | 
0 commit comments