Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 6 additions & 32 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,11 @@ from pandas._libs import tslib, algos, hashtable as _hash
from pandas._libs.tslib import Timestamp, Timedelta
from datetime import datetime, timedelta

from datetime cimport (get_datetime64_value, _pydatetime_to_dts,
pandas_datetimestruct)

from cpython cimport PyTuple_Check, PyList_Check

cdef extern from "datetime.h":
bint PyDateTime_Check(object o)
void PyDateTime_IMPORT()

cdef int64_t iNaT = util.get_nat()


PyDateTime_IMPORT

cdef extern from "Python.h":
int PySlice_Check(object)

Expand Down Expand Up @@ -415,12 +406,12 @@ cdef class DatetimeEngine(Int64Engine):
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
conv = _to_i8(val)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this a cimport?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment it is cdefd in _libs.index. This replaces it with a call to tslib.pydt_to_i8. As noted in the OP, pydt_to_i8 is currently just def, but in the PR is made cpdef so that the c version can be called here (tslib is cimported into _libs.index)

conv = tslib.pydt_to_i8(val)
loc = values.searchsorted(conv, side='left')
return util.get_value_at(values, loc) == conv

self._ensure_mapping_populated()
return _to_i8(val) in self.mapping
return tslib.pydt_to_i8(val) in self.mapping

cdef _get_index_values(self):
return self.vgetter().view('i8')
Expand All @@ -435,12 +426,12 @@ cdef class DatetimeEngine(Int64Engine):
# Welcome to the spaghetti factory
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
val = _to_i8(val)
val = tslib.pydt_to_i8(val)
return self._get_loc_duplicates(val)
values = self._get_index_values()

try:
conv = _to_i8(val)
conv = tslib.pydt_to_i8(val)
loc = values.searchsorted(conv, side='left')
except TypeError:
self._date_check_type(val)
Expand All @@ -452,7 +443,7 @@ cdef class DatetimeEngine(Int64Engine):

self._ensure_mapping_populated()
if not self.unique:
val = _to_i8(val)
val = tslib.pydt_to_i8(val)
return self._get_loc_duplicates(val)

try:
Expand All @@ -463,7 +454,7 @@ cdef class DatetimeEngine(Int64Engine):
pass

try:
val = _to_i8(val)
val = tslib.pydt_to_i8(val)
return self.mapping.get_item(val)
except (TypeError, ValueError):
self._date_check_type(val)
Expand Down Expand Up @@ -540,23 +531,6 @@ cpdef convert_scalar(ndarray arr, object value):

return value

cdef inline _to_i8(object val):
cdef pandas_datetimestruct dts
try:
return val.value
except AttributeError:
if util.is_datetime64_object(val):
return get_datetime64_value(val)
elif PyDateTime_Check(val):
tzinfo = getattr(val, 'tzinfo', None)
# Save the original date value so we can get the utcoffset from it.
ival = _pydatetime_to_dts(val, &dts)
if tzinfo is not None and not is_utc(tzinfo):
offset = get_utcoffset(tzinfo, val)
ival -= tslib._delta_to_nanoseconds(offset)
return ival
return val


cdef class MultiIndexObjectEngine(ObjectEngine):
"""
Expand Down
57 changes: 0 additions & 57 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ cdef double NaN = <double> np.NaN
cdef double nan = NaN
cdef double NAN = nan

from datetime import datetime as pydatetime

# this is our tseries.pxd
from datetime cimport (
get_timedelta64_value, get_datetime64_value,
Expand Down Expand Up @@ -132,61 +130,6 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
s += arr[i].__sizeof__()
return s

#----------------------------------------------------------------------
# datetime / io related

cdef int _EPOCH_ORD = 719163

from datetime import date as pydate

cdef inline int64_t gmtime(object date):
cdef int y, m, d, h, mn, s, days

y = PyDateTime_GET_YEAR(date)
m = PyDateTime_GET_MONTH(date)
d = PyDateTime_GET_DAY(date)
h = PyDateTime_DATE_GET_HOUR(date)
mn = PyDateTime_DATE_GET_MINUTE(date)
s = PyDateTime_DATE_GET_SECOND(date)

days = pydate(y, m, 1).toordinal() - _EPOCH_ORD + d - 1
return ((<int64_t> (((days * 24 + h) * 60 + mn))) * 60 + s) * 1000


cpdef object to_datetime(int64_t timestamp):
return pydatetime.utcfromtimestamp(timestamp / 1000.0)


cpdef object to_timestamp(object dt):
return gmtime(dt)


def array_to_timestamp(ndarray[object, ndim=1] arr):
cdef int i, n
cdef ndarray[int64_t, ndim=1] result

n = len(arr)
result = np.empty(n, dtype=np.int64)

for i from 0 <= i < n:
result[i] = gmtime(arr[i])

return result


def time64_to_datetime(ndarray[int64_t, ndim=1] arr):
cdef int i, n
cdef ndarray[object, ndim=1] result

n = len(arr)
result = np.empty(n, dtype=object)

for i from 0 <= i < n:
result[i] = to_datetime(arr[i])

return result


#----------------------------------------------------------------------
# isnull / notnull related

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3436,7 +3436,7 @@ def cast_to_nanoseconds(ndarray arr):
return result


def pydt_to_i8(object pydt):
cpdef pydt_to_i8(object pydt):
"""
Convert to int64 representation compatible with numpy datetime64; converts
to UTC
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import numpy as np
from pandas import (Series, DataFrame, Panel, Panel4D, Index,
MultiIndex, Int64Index, isna, concat,
MultiIndex, Int64Index, isna, concat, to_datetime,
SparseSeries, SparseDataFrame, PeriodIndex,
DatetimeIndex, TimedeltaIndex)
from pandas.core import config
Expand Down Expand Up @@ -4529,7 +4529,7 @@ def _unconvert_index(data, kind, encoding=None):
def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
kind = _ensure_decoded(kind)
if kind == u('datetime'):
index = lib.time64_to_datetime(data)
index = to_datetime(data)
elif kind in (u('integer')):
index = np.asarray(data, dtype=object)
elif kind in (u('string')):
Expand Down