pandas-dev · jreback · Aug 14, 2018 · Aug 2, 2018 · Aug 2, 2018 · Aug 2, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -216,7 +216,7 @@ New Behavior:
  idx = pd.interval_range(0, 4)
  idx.values
 
-This mirrors ``CateogricalIndex.values``, which returns a ``Categorical``.
+This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``.
 
 For situations where you need an ``ndarray`` of ``Interval`` objects, use
 :meth:`numpy.asarray` or ``idx.astype(object)``.
@@ -406,6 +406,34 @@ Previous Behavior:
  In [3]: pi - pi[0]
  Out[3]: Int64Index([0, 1, 2], dtype='int64')
 
+
+.. _whatsnew_0240.api.timedelta64_subtract_nan
+
+Addition/Subtraction of ``NaN`` from :class:``DataFrame``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Adding or subtracting ``NaN`` from a :class:`DataFrame` column with
+`timedelta64[ns]` dtype will now raise a ``TypeError`` instead of returning
+all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and
+``Series`` behavior (:issue:`22163`)
+
+.. ipython:: python
+
+ df = pd.DataFrame([pd.Timedelta(days=1)])
+ df - np.nan
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+ In [4]: df = pd.DataFrame([pd.Timedelta(days=1)])
+
+ In [5]: df - np.nan
+ Out[5]:
+ 0
+ 0 NaT
+
+
 .. _whatsnew_0240.api.extension:
 
 ExtensionType Changes
@@ -538,6 +566,16 @@ Datetimelike
 - Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`)
 - Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`)
 - Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`,:issue:`22163`)
+- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
+- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
+- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
+-
 
 Timedelta
 ^^^^^^^^^
@@ -585,6 +623,7 @@ Numeric
  when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``),
  a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
 - Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
+- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`)
 -
 
 Strings

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4936,6 +4936,14 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
  return self._constructor(new_data)
 
  def _combine_const(self, other, func, errors='raise', try_cast=True):
+ if lib.is_scalar(other) or np.ndim(other) == 0:
+ new_data = {i: func(self.iloc[:, i], other)
+ for i, col in enumerate(self.columns)}
+
+ result = self._constructor(new_data, index=self.index, copy=False)
+ result.columns = self.columns
+ return result
+
  new_data = self._data.eval(func=func, other=other,
  errors=errors,
  try_cast=try_cast)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1350,7 +1350,7 @@ def na_op(x, y):
  with np.errstate(all='ignore'):
  result = method(y)
  if result is NotImplemented:
- raise TypeError("invalid type comparison")
+ return invalid_comparison(x, y, op)
  else:
  result = op(x, y)
 
@@ -1366,6 +1366,10 @@ def wrapper(self, other, axis=None):
 
  res_name = get_op_result_name(self, other)
 
+ if isinstance(other, list):
+ # TODO: same for tuples?
+ other = np.asarray(other)
+
  if isinstance(other, ABCDataFrame): # pragma: no cover
  # Defer to DataFrame implementation; fail early
  return NotImplemented
@@ -1459,8 +1463,6 @@ def wrapper(self, other, axis=None):
 
  else:
  values = self.get_values()
- if isinstance(other, list):
- other = np.asarray(other)
 
  with np.errstate(all='ignore'):
  res = na_op(values, other)
@@ -1741,7 +1743,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
  if fill_value is not None:
  self = self.fillna(fill_value)
 
- return self._combine_const(other, na_op, try_cast=True)
+ pass_op = op if lib.is_scalar(other) else na_op
+ return self._combine_const(other, pass_op, try_cast=True)
 
  f.__name__ = op_name
 

diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -63,6 +63,15 @@ def test_tz_aware_scalar_comparison(self, timestamps):
  expected = pd.DataFrame({'test': [False, False]})
  tm.assert_frame_equal(df == -1, expected)
 
+ def test_dt64_nat_comparison(self):
+ # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly
+ ts = pd.Timestamp.now()
+ df = pd.DataFrame([ts, pd.NaT])
+ expected = pd.DataFrame([True, False])
+
+ result = df == ts
+ tm.assert_frame_equal(result, expected)
+
 
 class TestDatetime64SeriesComparison(object):
  # TODO: moved from tests.series.test_operators; needs cleanup
@@ -640,10 +649,22 @@ def test_dti_cmp_object_dtype(self):
 # Arithmetic
 
 class TestFrameArithmetic(object):
+ def test_dt64arr_sub_dtscalar(self, box):
+ # GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype
+ idx = pd.date_range('2013-01-01', periods=3)
+ idx = tm.box_expected(idx, box)
+
+ ts = pd.Timestamp('2013-01-01')
+ # TODO: parametrize over scalar types
+
+ expected = pd.TimedeltaIndex(['0 Days', '1 Day', '2 Days'])
+ expected = tm.box_expected(expected, box)
+
+ result = idx - ts
+ tm.assert_equal(result, expected)
 
- @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
- strict=True)
  def test_df_sub_datetime64_not_ns(self):
+ # GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano
  df = pd.DataFrame(pd.date_range('20130101', periods=3))
  dt64 = np.datetime64('2013-01-01')
  assert dt64.dtype == 'datetime64[D]'
@@ -992,9 +1013,11 @@ def test_dti_add_sub_float(self, op, other):
  with pytest.raises(TypeError):
  op(dti, other)
 
- def test_dti_add_timestamp_raises(self):
+ def test_dti_add_timestamp_raises(self, box):
+ # GH#22163 ensure DataFrame doesn't cast Timestamp to i8
  idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
- msg = "cannot add DatetimeIndex and Timestamp"
+ idx = tm.box_expected(idx, box)
+ msg = "cannot add"
  with tm.assert_raises_regex(TypeError, msg):
  idx + Timestamp('2011-01-01')
 
@@ -1090,13 +1113,17 @@ def test_dti_add_intarray_no_freq(self, box):
  # -------------------------------------------------------------
  # Binary operations DatetimeIndex and timedelta-like
 
- def test_dti_add_timedeltalike(self, tz_naive_fixture, delta):
+ def test_dti_add_timedeltalike(self, tz_naive_fixture, delta, box):
+ # GH#22005, GH#22163 check DataFrame doesn't raise TypeError
  tz = tz_naive_fixture
  rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
+ rng = tm.box_expected(rng, box)
+
  result = rng + delta
  expected = pd.date_range('2000-01-01 02:00',
  '2000-02-01 02:00', tz=tz)
- tm.assert_index_equal(result, expected)
+ expected = tm.box_expected(expected, box)
+ tm.assert_equal(result, expected)
 
  def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta):
  tz = tz_naive_fixture
@@ -1662,14 +1689,8 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
  res3 = dti - other
  tm.assert_series_equal(res3, expected_sub)
 
- @pytest.mark.parametrize('box', [
- pd.Index,
- pd.Series,
- pytest.param(pd.DataFrame,
- marks=pytest.mark.xfail(reason="Returns object dtype",
- strict=True))
- ], ids=lambda x: x.__name__)
  def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
+ # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
  timezone = tz_aware_fixture
  if timezone == 'US/Pacific':
  dates = date_range('2012-11-01', periods=3, tz=timezone)

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -58,13 +58,6 @@ def test_ops_series(self):
  tm.assert_series_equal(expected, td * other)
  tm.assert_series_equal(expected, other * td)
 
- @pytest.mark.parametrize('box', [
- pd.Index,
- Series,
- pytest.param(pd.DataFrame,
- marks=pytest.mark.xfail(reason="block.eval incorrect",
- strict=True))
- ])
  @pytest.mark.parametrize('index', [
  pd.Int64Index(range(1, 11)),
  pd.UInt64Index(range(1, 11)),
@@ -79,7 +72,7 @@ def test_ops_series(self):
  def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
  # GH#19333
 
- if (box is Series and
+ if (box in [Series, pd.DataFrame] and
  type(scalar_td) is timedelta and index.dtype == 'f8'):
  raise pytest.xfail(reason="Cannot multiply timedelta by float")
 

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -17,6 +17,53 @@
 # Comparisons
 
 class TestFrameComparisons(object):
+ def test_flex_comparison_nat(self):
+ # GH#15697, GH#22163 df.eq(pd.NaT) should behave like df == pd.NaT,
+ # and _definitely_ not be NaN
+ df = pd.DataFrame([pd.NaT])
+
+ result = df == pd.NaT
+ # result.iloc[0, 0] is a np.bool_ object
+ assert result.iloc[0, 0].item() is False
+
+ result = df.eq(pd.NaT)
+ assert result.iloc[0, 0].item() is False
+
+ result = df != pd.NaT
+ assert result.iloc[0, 0].item() is True
+
+ result = df.ne(pd.NaT)
+ assert result.iloc[0, 0].item() is True
+
+ def test_mixed_comparison(self):
+ # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
+ # not raise TypeError
+ # (this appears to be fixed before #22163, not sure when)
+ df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]])
+ other = pd.DataFrame([['a', 'b'], ['c', 'd']])
+
+ result = df == other
+ assert not result.any().any()
+
+ result = df != other
+ assert result.all().all()
+
+ def test_df_numeric_cmp_dt64_raises(self):
+ # GH#8932, GH#22163
+ ts = pd.Timestamp.now()
+ df = pd.DataFrame({'x': range(5)})
+ with pytest.raises(TypeError):
+ df > ts
+ with pytest.raises(TypeError):
+ df < ts
+ with pytest.raises(TypeError):
+ ts < df
+ with pytest.raises(TypeError):
+ ts > df
+
+ assert not (df == ts).any().any()
+ assert (df != ts).all().all()
+
  def test_df_boolean_comparison_error(self):
  # GH#4576
  # boolean comparisons with a tuple/list give unexpected results
@@ -32,8 +79,8 @@ def test_df_float_none_comparison(self):
  df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
  columns=['A', 'B', 'C'])
 
- with pytest.raises(TypeError):
-  df.__eq__(None)
+ result = df.__eq__(None)
+ assert not result.any().any()
 
  def test_df_string_comparison(self):
  df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
@@ -251,3 +298,20 @@ def test_arith_flex_zero_len_raises(self):
 
  with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
  df_len0.sub(df['A'], axis=None, fill_value=3)
+
+
+class TestFrameArithmetic(object):
+ def test_df_bool_mul_int(self):
+ # GH#22047, GH#22163 multiplication by 1 should result in int dtype,
+ # not object dtype
+ df = pd.DataFrame([[False, True], [False, False]])
+ result = df * 1
+
+ # On appveyor this comes back as np.int32 instead of np.int64,
+ # so we check dtype.kind instead of just dtype
+ kinds = result.dtypes.apply(lambda x: x.kind)
+ assert (kinds == 'i').all()
+
+ result = 1 * df
+ kinds = result.dtypes.apply(lambda x: x.kind)
+ assert (kinds == 'i').all()
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -273,6 +273,8 @@ def test_getitem_boolean(self):
  # test df[df > 0]
  for df in [self.tsframe, self.mixed_frame,
  self.mixed_float, self.mixed_int]:
+ if compat.PY3 and df is self.mixed_frame:
+ continue
 
  data = df._get_numeric_data()
  bif = df[df > 0]
@@ -2468,8 +2470,11 @@ def test_boolean_indexing_mixed(self):
  assert_frame_equal(df2, expected)
 
  df['foo'] = 'test'
- with tm.assert_raises_regex(TypeError, 'boolean setting '
- 'on mixed-type'):
+ msg = ("boolean setting on mixed-type|"
+ "not supported between|"
+ "unorderable types")
+ with tm.assert_raises_regex(TypeError, msg):
+ # TODO: This message should be the same in PY2/PY3
  df[df > 0.3] = 1
 
  def test_where(self):
@@ -2502,6 +2507,10 @@ def _check_get(df, cond, check_dtypes=True):
  # check getting
  for df in [default_frame, self.mixed_frame,
  self.mixed_float, self.mixed_int]:
+ if compat.PY3 and df is self.mixed_frame:
+ with pytest.raises(TypeError):
+ df > 0
+ continue
  cond = df > 0
  _check_get(df, cond)
 
@@ -2549,6 +2558,10 @@ def _check_align(df, cond, other, check_dtypes=True):
  assert (rs.dtypes == df.dtypes).all()
 
  for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
+ if compat.PY3 and df is self.mixed_frame:
+ with pytest.raises(TypeError):
+ df > 0
+ continue
 
  # other is a frame
  cond = (df > 0)[1:]
@@ -2594,6 +2607,10 @@ def _check_set(df, cond, check_dtypes=True):
 
  for df in [default_frame, self.mixed_frame, self.mixed_float,
  self.mixed_int]:
+ if compat.PY3 and df is self.mixed_frame:
+ with pytest.raises(TypeError):
+ df > 0
+ continue
 
  cond = df > 0
  _check_set(df, cond)
@@ -2759,9 +2776,14 @@ def test_where_datetime(self):
  C=np.random.randn(5)))
 
  stamp = datetime(2013, 1, 3)
- result = df[df > stamp]
+ with pytest.raises(TypeError):
+ df > stamp
+
+ result = df[df.iloc[:, :-1] > stamp]
+
  expected = df.copy()
  expected.loc[[0, 1], 'A'] = np.nan
+ expected.loc[:, 'C'] = np.nan
  assert_frame_equal(result, expected)
 
  def test_where_none(self):