Skip to content

Commit 169e833

Browse files
committed
Merge remote-tracking branch 'upstream/master' into api-doc-methods
2 parents ba257cc + 63e8527 commit 169e833

File tree

11 files changed

+188
-76
lines changed

11 files changed

+188
-76
lines changed

doc/source/whatsnew/v0.21.1.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Conversion
7373
Indexing
7474
^^^^^^^^
7575

76+
- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`)
7677
- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`)
7778
-
7879
-
@@ -85,6 +86,7 @@ I/O
8586
- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`)
8687
- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`)
8788
- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`)
89+
- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`)
8890

8991
Plotting
9092
^^^^^^^^

doc/source/whatsnew/v0.22.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Removal of prior version deprecations/changes
6262
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6363

6464
- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`)
65-
-
65+
- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`).
6666
-
6767

6868
.. _whatsnew_0220.performance:

pandas/_libs/index.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ from hashtable cimport HashTable
1919

2020
from pandas._libs import algos, period as periodlib, hashtable as _hash
2121
from pandas._libs.tslib import Timestamp, Timedelta
22-
from datetime import datetime, timedelta
22+
from datetime import datetime, timedelta, date
2323

2424
from cpython cimport PyTuple_Check, PyList_Check
2525

@@ -549,7 +549,7 @@ cpdef convert_scalar(ndarray arr, object value):
549549
if arr.descr.type_num == NPY_DATETIME:
550550
if isinstance(value, np.ndarray):
551551
pass
552-
elif isinstance(value, datetime):
552+
elif isinstance(value, (datetime, np.datetime64, date)):
553553
return Timestamp(value).value
554554
elif value is None or value != value:
555555
return iNaT

pandas/core/frame.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4029,15 +4029,30 @@ def combine(self, other, func, fill_value=None, overwrite=True):
40294029
----------
40304030
other : DataFrame
40314031
func : function
4032+
Function that takes two series as inputs and return a Series or a
4033+
scalar
40324034
fill_value : scalar value
40334035
overwrite : boolean, default True
40344036
If True then overwrite values for common keys in the calling frame
40354037
40364038
Returns
40374039
-------
40384040
result : DataFrame
4039-
"""
40404041
4042+
Examples
4043+
--------
4044+
>>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]})
4045+
>>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]})
4046+
>>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2)
4047+
A B
4048+
0 0 3
4049+
1 0 3
4050+
4051+
See Also
4052+
--------
4053+
DataFrame.combine_first : Combine two DataFrame objects and default to
4054+
non-null values in frame calling the method
4055+
"""
40414056
other_idxlen = len(other.index) # save for compare
40424057

40434058
this, other = self.align(other, copy=False)
@@ -4125,16 +4140,24 @@ def combine_first(self, other):
41254140
----------
41264141
other : DataFrame
41274142
4143+
Returns
4144+
-------
4145+
combined : DataFrame
4146+
41284147
Examples
41294148
--------
4130-
a's values prioritized, use values from b to fill holes:
4131-
4132-
>>> a.combine_first(b)
4149+
df1's values prioritized, use values from df2 to fill holes:
41334150
4151+
>>> df1 = pd.DataFrame([[1, np.nan]])
4152+
>>> df2 = pd.DataFrame([[3, 4]])
4153+
>>> df1.combine_first(df2)
4154+
0 1
4155+
0 1 4.0
41344156
4135-
Returns
4136-
-------
4137-
combined : DataFrame
4157+
See Also
4158+
--------
4159+
DataFrame.combine : Perform series-wise operation on two DataFrames
4160+
using a given function
41384161
"""
41394162
import pandas.core.computation.expressions as expressions
41404163

@@ -5782,7 +5805,12 @@ def idxmin(self, axis=0, skipna=True):
57825805
0 or 'index' for row-wise, 1 or 'columns' for column-wise
57835806
skipna : boolean, default True
57845807
Exclude NA/null values. If an entire row/column is NA, the result
5785-
will be NA
5808+
will be NA.
5809+
5810+
Raises
5811+
------
5812+
ValueError
5813+
* If the row/column is empty
57865814
57875815
Returns
57885816
-------
@@ -5813,7 +5841,12 @@ def idxmax(self, axis=0, skipna=True):
58135841
0 or 'index' for row-wise, 1 or 'columns' for column-wise
58145842
skipna : boolean, default True
58155843
Exclude NA/null values. If an entire row/column is NA, the result
5816-
will be first index.
5844+
will be NA.
5845+
5846+
Raises
5847+
------
5848+
ValueError
5849+
* If the row/column is empty
58175850
58185851
Returns
58195852
-------

pandas/core/indexes/multi.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# pylint: disable=E1101,E1103,W0232
33
import datetime
44
import warnings
5-
from functools import partial
65
from sys import getsizeof
76

87
import numpy as np
@@ -28,8 +27,7 @@
2827
is_true_slices)
2928

3029
import pandas.core.base as base
31-
from pandas.util._decorators import (Appender, cache_readonly,
32-
deprecate, deprecate_kwarg)
30+
from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg
3331
import pandas.core.common as com
3432
import pandas.core.missing as missing
3533
import pandas.core.algorithms as algos
@@ -201,7 +199,8 @@ def _verify_integrity(self, labels=None, levels=None):
201199
" inconsistent state" % (i, label.max(),
202200
len(level)))
203201

204-
def _get_levels(self):
202+
@property
203+
def levels(self):
205204
return self._levels
206205

207206
def _set_levels(self, levels, level=None, copy=False, validate=True,
@@ -303,14 +302,8 @@ def set_levels(self, levels, level=None, inplace=False,
303302
if not inplace:
304303
return idx
305304

306-
# remove me in 0.14 and change to read only property
307-
__set_levels = deprecate("setting `levels` directly",
308-
partial(set_levels, inplace=True,
309-
verify_integrity=True),
310-
alt_name="set_levels")
311-
levels = property(fget=_get_levels, fset=__set_levels)
312-
313-
def _get_labels(self):
305+
@property
306+
def labels(self):
314307
return self._labels
315308

316309
def _set_labels(self, labels, level=None, copy=False, validate=True,
@@ -403,13 +396,6 @@ def set_labels(self, labels, level=None, inplace=False,
403396
if not inplace:
404397
return idx
405398

406-
# remove me in 0.14 and change to readonly property
407-
__set_labels = deprecate("setting labels directly",
408-
partial(set_labels, inplace=True,
409-
verify_integrity=True),
410-
alt_name="set_labels")
411-
labels = property(fget=_get_labels, fset=__set_labels)
412-
413399
def copy(self, names=None, dtype=None, levels=None, labels=None,
414400
deep=False, _set_identity=False, **kwargs):
415401
"""
@@ -844,9 +830,10 @@ def duplicated(self, keep='first'):
844830

845831
return duplicated_int64(ids, keep)
846832

847-
@Appender(ibase._index_shared_docs['fillna'])
848833
def fillna(self, value=None, downcast=None):
849-
# isna is not implemented for MultiIndex
834+
"""
835+
fillna is not implemented for MultiIndex
836+
"""
850837
raise NotImplementedError('isna is not defined for MultiIndex')
851838

852839
@Appender(_index_shared_docs['dropna'])

pandas/core/series.py

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,7 +1306,13 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs):
13061306
Parameters
13071307
----------
13081308
skipna : boolean, default True
1309-
Exclude NA/null values
1309+
Exclude NA/null values. If the entire Series is NA, the result
1310+
will be NA.
1311+
1312+
Raises
1313+
------
1314+
ValueError
1315+
* If the Series is empty
13101316
13111317
Returns
13121318
-------
@@ -1336,7 +1342,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
13361342
Parameters
13371343
----------
13381344
skipna : boolean, default True
1339-
Exclude NA/null values
1345+
Exclude NA/null values. If the entire Series is NA, the result
1346+
will be NA.
1347+
1348+
Raises
1349+
------
1350+
ValueError
1351+
* If the Series is empty
13401352
13411353
Returns
13421354
-------
@@ -1361,13 +1373,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
13611373

13621374
# ndarray compat
13631375
argmin = deprecate('argmin', idxmin,
1364-
msg="'argmin' is deprecated. Use 'idxmin' instead. "
1376+
msg="'argmin' is deprecated, use 'idxmin' instead. "
13651377
"The behavior of 'argmin' will be corrected to "
13661378
"return the positional minimum in the future. "
13671379
"Use 'series.values.argmin' to get the position of "
13681380
"the minimum now.")
13691381
argmax = deprecate('argmax', idxmax,
1370-
msg="'argmax' is deprecated. Use 'idxmax' instead. "
1382+
msg="'argmax' is deprecated, use 'idxmax' instead. "
13711383
"The behavior of 'argmax' will be corrected to "
13721384
"return the positional maximum in the future. "
13731385
"Use 'series.values.argmax' to get the position of "
@@ -1731,11 +1743,26 @@ def combine(self, other, func, fill_value=np.nan):
17311743
----------
17321744
other : Series or scalar value
17331745
func : function
1746+
Function that takes two scalars as inputs and return a scalar
17341747
fill_value : scalar value
17351748
17361749
Returns
17371750
-------
17381751
result : Series
1752+
1753+
Examples
1754+
--------
1755+
>>> s1 = Series([1, 2])
1756+
>>> s2 = Series([0, 3])
1757+
>>> s1.combine(s2, lambda x1, x2: x1 if x1 < x2 else x2)
1758+
0 0
1759+
1 2
1760+
dtype: int64
1761+
1762+
See Also
1763+
--------
1764+
Series.combine_first : Combine Series values, choosing the calling
1765+
Series's values first
17391766
"""
17401767
if isinstance(other, Series):
17411768
new_index = self.index.union(other.index)
@@ -1764,7 +1791,21 @@ def combine_first(self, other):
17641791
17651792
Returns
17661793
-------
1767-
y : Series
1794+
combined : Series
1795+
1796+
Examples
1797+
--------
1798+
>>> s1 = pd.Series([1, np.nan])
1799+
>>> s2 = pd.Series([3, 4])
1800+
>>> s1.combine_first(s2)
1801+
0 1.0
1802+
1 4.0
1803+
dtype: float64
1804+
1805+
See Also
1806+
--------
1807+
Series.combine : Perform elementwise operation on two Series
1808+
using a given function
17681809
"""
17691810
new_index = self.index.union(other.index)
17701811
this = self.reindex(new_index, copy=False)

pandas/io/parquet.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,10 @@ def write(self, df, path, compression='snappy',
7676
table, path, compression=compression,
7777
coerce_timestamps=coerce_timestamps, **kwargs)
7878

79-
def read(self, path, columns=None):
79+
def read(self, path, columns=None, **kwargs):
8080
path, _, _ = get_filepath_or_buffer(path)
81-
return self.api.parquet.read_table(path, columns=columns).to_pandas()
81+
return self.api.parquet.read_table(path, columns=columns,
82+
**kwargs).to_pandas()
8283

8384

8485
class FastParquetImpl(object):
@@ -115,9 +116,9 @@ def write(self, df, path, compression='snappy', **kwargs):
115116
self.api.write(path, df,
116117
compression=compression, **kwargs)
117118

118-
def read(self, path, columns=None):
119+
def read(self, path, columns=None, **kwargs):
119120
path, _, _ = get_filepath_or_buffer(path)
120-
return self.api.ParquetFile(path).to_pandas(columns=columns)
121+
return self.api.ParquetFile(path).to_pandas(columns=columns, **kwargs)
121122

122123

123124
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
@@ -175,7 +176,7 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
175176
if df.columns.inferred_type not in valid_types:
176177
raise ValueError("parquet must have string column names")
177178

178-
return impl.write(df, path, compression=compression)
179+
return impl.write(df, path, compression=compression, **kwargs)
179180

180181

181182
def read_parquet(path, engine='auto', columns=None, **kwargs):
@@ -205,4 +206,4 @@ def read_parquet(path, engine='auto', columns=None, **kwargs):
205206
"""
206207

207208
impl = get_engine(engine)
208-
return impl.read(path, columns=columns)
209+
return impl.read(path, columns=columns, **kwargs)

pandas/tests/indexes/datetimes/test_partial_slicing.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import pytest
44

5-
from datetime import datetime
5+
from datetime import datetime, date
66
import numpy as np
77
import pandas as pd
8+
import operator as op
89

910
from pandas import (DatetimeIndex, Series, DataFrame,
1011
date_range, Index, Timedelta, Timestamp)
@@ -330,3 +331,21 @@ def test_loc_datetime_length_one(self):
330331

331332
result = df.loc['2016-10-01T00:00:00':]
332333
tm.assert_frame_equal(result, df)
334+
335+
@pytest.mark.parametrize('datetimelike', [
336+
Timestamp('20130101'), datetime(2013, 1, 1),
337+
date(2013, 1, 1), np.datetime64('2013-01-01T00:00', 'ns')])
338+
@pytest.mark.parametrize('op,expected', [
339+
(op.lt, [True, False, False, False]),
340+
(op.le, [True, True, False, False]),
341+
(op.eq, [False, True, False, False]),
342+
(op.gt, [False, False, False, True])])
343+
def test_selection_by_datetimelike(self, datetimelike, op, expected):
344+
# GH issue #17965, test for ability to compare datetime64[ns] columns
345+
# to datetimelike
346+
df = DataFrame({'A': [pd.Timestamp('20120101'),
347+
pd.Timestamp('20130101'),
348+
np.nan, pd.Timestamp('20130103')]})
349+
result = op(df.A, datetimelike)
350+
expected = Series(expected, name='A')
351+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)