-
- Notifications
You must be signed in to change notification settings - Fork 19.2k
Last of the timezones funcs #17669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Last of the timezones funcs #17669
Changes from 3 commits
1932996 6ccbfb2 adf9099 fcd26c1 355cbe8 e17089c b82396d e6096c5 926bb73 2afc7b2 8f4b368 7b4c9b2 0248f53 ce701e5 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -105,7 +105,7 @@ from tslibs.timezones cimport ( | |
| is_utc, is_tzlocal, is_fixed_offset, | ||
| treat_tz_as_dateutil, treat_tz_as_pytz, | ||
| get_timezone, get_utcoffset, maybe_get_tz, | ||
| get_dst_info | ||
| get_dst_info, _infer_dst | ||
| ) | ||
| | ||
| | ||
| | @@ -4003,48 +4003,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
| result_b[i] = v | ||
| | ||
| if infer_dst: | ||
| dst_hours = np.empty(n, dtype=np.int64) | ||
| dst_hours.fill(NPY_NAT) | ||
| | ||
| # Get the ambiguous hours (given the above, these are the hours | ||
| # where result_a != result_b and neither of them are NAT) | ||
| both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) | ||
| both_eq = result_a == result_b | ||
| trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) | ||
| if trans_idx.size == 1: | ||
| stamp = Timestamp(vals[trans_idx]) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert this routine. | ||
| raise pytz.AmbiguousTimeError( | ||
| "Cannot infer dst time from %s as there " | ||
| "are no repeated times" % stamp) | ||
| # Split the array into contiguous chunks (where the difference between | ||
| # indices is 1). These are effectively dst transitions in different | ||
| # years which is useful for checking that there is not an ambiguous | ||
| # transition in an individual year. | ||
| if trans_idx.size > 0: | ||
| one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 | ||
| trans_grp = np.array_split(trans_idx, one_diff) | ||
| | ||
| # Iterate through each day, if there are no hours where the | ||
| # delta is negative (indicates a repeat of hour) the switch | ||
| # cannot be inferred | ||
| for grp in trans_grp: | ||
| | ||
| delta = np.diff(result_a[grp]) | ||
| if grp.size == 1 or np.all(delta > 0): | ||
| stamp = Timestamp(vals[grp[0]]) | ||
| raise pytz.AmbiguousTimeError(stamp) | ||
| | ||
| # Find the index for the switch and pull from a for dst and b | ||
| # for standard | ||
| switch_idx = (delta <= 0).nonzero()[0] | ||
| if switch_idx.size > 1: | ||
| raise pytz.AmbiguousTimeError( | ||
| "There are %i dst switches when " | ||
| "there should only be 1." % switch_idx.size) | ||
| switch_idx = switch_idx[0] + 1 # Pull the only index and adjust | ||
| a_idx = grp[:switch_idx] | ||
| b_idx = grp[switch_idx:] | ||
| dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) | ||
| dst_hours = _infer_dst(vals, result_a, result_b) | ||
| | ||
| for i in range(n): | ||
| left = result_a[i] | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -275,3 +275,84 @@ cdef object get_dst_info(object tz): | |
| dst_cache[cache_key] = (trans, deltas, typ) | ||
| | ||
| return dst_cache[cache_key] | ||
| | ||
| | ||
| def _infer_tzinfo(start, end): | ||
| ||
| def _infer(a, b): | ||
| tz = a.tzinfo | ||
| ||
| if b and b.tzinfo: | ||
| if not (get_timezone(tz) == get_timezone(b.tzinfo)): | ||
| raise AssertionError('Inputs must both have the same timezone,' | ||
| ' {timezone1} != {timezone2}' | ||
| .format(timezone1=tz, timezone2=b.tzinfo)) | ||
| return tz | ||
| | ||
| tz = None | ||
| if start is not None: | ||
| tz = _infer(start, end) | ||
| elif end is not None: | ||
| tz = _infer(end, start) | ||
| return tz | ||
| | ||
| | ||
| cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, | ||
| ||
| ndarray[int64_t] result_a, | ||
| ndarray[int64_t] result_b): | ||
| cdef: | ||
| Py_ssize_t n = len(vals) | ||
| ndarray[int64_t] dst_hours | ||
| ||
| | ||
| dst_hours = np.empty(n, dtype=np.int64) | ||
| dst_hours.fill(NPY_NAT) | ||
| | ||
| # Get the ambiguous hours (given the above, these are the hours | ||
| # where result_a != result_b and neither of them are NAT) | ||
| both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) | ||
| both_eq = result_a == result_b | ||
| trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) | ||
| if trans_idx.size == 1: | ||
| stamp = np.int64(vals[trans_idx]).astype('datetime64[ns]') | ||
| # Render `stamp` as e.g. '2017-08-30 07:59:23.123456' | ||
| # as opposed to str(stamp) which would | ||
| # be '2017-08-30T07:59:23.123456789' | ||
| stamp = str(stamp).replace('T', ' ')[:-3] | ||
| raise pytz.AmbiguousTimeError( | ||
| "Cannot infer dst time from %s as there " | ||
| "are no repeated times" % stamp) | ||
| | ||
| # Split the array into contiguous chunks (where the difference between | ||
| # indices is 1). These are effectively dst transitions in different | ||
| # years which is useful for checking that there is not an ambiguous | ||
| # transition in an individual year. | ||
| if trans_idx.size > 0: | ||
| one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 | ||
| trans_grp = np.array_split(trans_idx, one_diff) | ||
| | ||
| # Iterate through each day, if there are no hours where the | ||
| # delta is negative (indicates a repeat of hour) the switch | ||
| # cannot be inferred | ||
| for grp in trans_grp: | ||
| | ||
| delta = np.diff(result_a[grp]) | ||
| if grp.size == 1 or np.all(delta > 0): | ||
| stamp = np.int64(vals[grp[0]]).astype('datetime64[ns]') | ||
| # Render `stamp` as e.g. '2017-08-30 07:59:23.123456' | ||
| # as opposed to str(stamp) which would | ||
| # be '2017-08-30T07:59:23.123456789' | ||
| stamp = str(stamp).replace('T', ' ')[:-3] | ||
| raise pytz.AmbiguousTimeError(stamp) | ||
| | ||
| # Find the index for the switch and pull from a for dst and b | ||
| # for standard | ||
| switch_idx = (delta <= 0).nonzero()[0] | ||
| if switch_idx.size > 1: | ||
| raise pytz.AmbiguousTimeError( | ||
| "There are %i dst switches when " | ||
| "there should only be 1." % switch_idx.size) | ||
| | ||
| switch_idx = switch_idx[0] + 1 # Pull the only index and adjust | ||
| a_idx = grp[:switch_idx] | ||
| b_idx = grp[switch_idx:] | ||
| dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) | ||
| | ||
| return dst_hours | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -4,7 +4,7 @@ | |
| | ||
| from pandas._libs import lib, tslib | ||
| from pandas._libs.tslibs.strptime import array_strptime | ||
| from pandas._libs.tslibs.timezones import get_timezone | ||
| from pandas._libs.tslibs.timezones import get_timezone, _infer_tzinfo # noqa | ||
| | ||
| from pandas.core.dtypes.common import ( | ||
| _ensure_object, | ||
| | @@ -42,22 +42,6 @@ def _lexer_split_from_str(dt_str): | |
| pass | ||
| | ||
| | ||
| def _infer_tzinfo(start, end): | ||
| def _infer(a, b): | ||
| tz = a.tzinfo | ||
| if b and b.tzinfo: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is this actually used? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Outside of tests, its used once in | ||
| if not (get_timezone(tz) == get_timezone(b.tzinfo)): | ||
| raise AssertionError('Inputs must both have the same timezone,' | ||
| ' {timezone1} != {timezone2}' | ||
| .format(timezone1=tz, timezone2=b.tzinfo)) | ||
| return tz | ||
| | ||
| tz = None | ||
| if start is not None: | ||
| tz = _infer(start, end) | ||
| elif end is not None: | ||
| tz = _infer(end, start) | ||
| return tz | ||
| | ||
| | ||
| def _guess_datetime_format(dt_str, dayfirst=False, | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
let's give this a more descriptive name:
infer_dst_transitionsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure