Skip to content

Commit 60426a6

Browse files
committed
Merge origin
2 parents dfa11d2 + 2a381d3 commit 60426a6

File tree

26 files changed

+134
-59
lines changed

26 files changed

+134
-59
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -392,9 +392,9 @@ and users can skip the check by explicitly specifying ``sort=True`` or
392392
``sort=False``.
393393

394394
This deprecation can also impact pandas' internal usage of :func:`concat`.
395-
While we have investigated uses of :func:`concat` to determine if this could lead
396-
to a change in behavior of other functions and methods in the API, it is
397-
possible some have been missed. In order to be cautious here, pandas has *not*
395+
Here cases where :func:`concat` was sorting a :class:`DatetimeIndex` but not
396+
other indexes are considered bugs and have been fixed as noted below. However
397+
it is possible some have been missed. In order to be cautious here, pandas has *not*
398398
added ``sort=False`` to any internal calls where we believe behavior should not change.
399399
If we have missed something, users will not experience a behavior change but they
400400
will receive a warning about :func:`concat` even though they are not directly
@@ -431,6 +431,14 @@ we may address any potential behavior changes.
431431
432432
pd.concat([df1, df2], axis=1, sort=False)
433433
434+
Cases where pandas' internal usage of :func:`concat` resulted in inconsistent sorting
435+
that are now fixed in this release are as follows.
436+
437+
- :meth:`Series.apply` and :meth:`DataFrame.apply` with a list-like or dict-like ``func`` argument.
438+
- :meth:`Series.shift`, :meth:`DataFrame.shift`, :meth:`.SeriesGroupBy.shift`, :meth:`.DataFrameGroupBy.shift` with the ``periods`` argument a list of length greater than 1.
439+
- :meth:`DataFrame.join` with ``other`` a list of one or more Series or DataFrames and ``how="inner"``, ``how="left"``, or ``how="right"``.
440+
- :meth:`Series.str.cat` with ``others`` a Series or DataFrame.
441+
434442
.. _whatsnew_300.api_breaking.value_counts_sorting:
435443

436444
Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
@@ -1243,7 +1251,6 @@ Groupby/resample/rolling
12431251
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
12441252
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
12451253
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
1246-
- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`)
12471254
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
12481255
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
12491256
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)

pandas/core/apply.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def transform_dict_like(self, func) -> DataFrame:
382382
for name, how in func.items():
383383
colg = obj._gotitem(name, ndim=1)
384384
results[name] = colg.transform(how, 0, *args, **kwargs)
385-
return concat(results, axis=1) # nobug
385+
return concat(results, axis=1)
386386

387387
def transform_str_or_callable(self, func) -> DataFrame | Series:
388388
"""
@@ -485,7 +485,7 @@ def wrap_results_list_like(
485485
obj = self.obj
486486

487487
try:
488-
return concat(results, keys=keys, axis=1, sort=False) # maybebug
488+
return concat(results, keys=keys, axis=1, sort=False)
489489
except TypeError as err:
490490
# we are concatting non-NDFrame objects,
491491
# e.g. a list of scalars
@@ -635,10 +635,11 @@ def wrap_results_dict_like(
635635
keys_to_use = ktu
636636

637637
axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
638-
result = concat( # maybebug
638+
result = concat(
639639
results,
640640
axis=axis,
641641
keys=keys_to_use,
642+
sort=False,
642643
)
643644
elif any(is_ndframe):
644645
# There is a mix of NDFrames and scalars

pandas/core/arrays/arrow/accessors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,6 @@ def explode(self) -> DataFrame:
496496
from pandas import concat
497497

498498
pa_type = self._pa_array.type
499-
return concat( # nobug
499+
return concat(
500500
[self.field(i) for i in range(pa_type.num_fields)], axis="columns"
501501
)

pandas/core/arrays/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2686,7 +2686,7 @@ def describe(self) -> DataFrame:
26862686
from pandas import Index
26872687
from pandas.core.reshape.concat import concat
26882688

2689-
result = concat([counts, freqs], ignore_index=True, axis=1) # nobug
2689+
result = concat([counts, freqs], ignore_index=True, axis=1)
26902690
result.columns = Index(["counts", "freqs"])
26912691
result.index.name = "categories"
26922692

pandas/core/frame.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6137,7 +6137,7 @@ def shift(
61376137
.shift(periods=period, freq=freq, axis=axis, fill_value=fill_value)
61386138
.add_suffix(f"{suffix}_{period}" if suffix else f"_{period}")
61396139
)
6140-
return concat(shifted_dataframes, axis=1) # bug
6140+
return concat(shifted_dataframes, axis=1, sort=False)
61416141
elif suffix:
61426142
raise ValueError("Cannot specify `suffix` if `periods` is an int.")
61436143
periods = cast(int, periods)
@@ -11166,7 +11166,7 @@ def _append_internal(
1116611166

1116711167
from pandas.core.reshape.concat import concat
1116811168

11169-
result = concat( # possible bug
11169+
result = concat(
1117011170
[self, row_df],
1117111171
ignore_index=ignore_index,
1117211172
)
@@ -11394,7 +11394,7 @@ def join(
1139411394
# join indexes only using concat
1139511395
if can_concat:
1139611396
if how == "left" or how == "right":
11397-
res = concat( # nobug
11397+
res = concat(
1139811398
frames, axis=1, join="outer", verify_integrity=True, sort=sort
1139911399
)
1140011400
index = self.index if how == "left" else frames[-1].index
@@ -11405,7 +11405,7 @@ def join(
1140511405
else:
1140611406
if how == "outer":
1140711407
sort = True
11408-
return concat( # bug
11408+
return concat(
1140911409
frames, axis=1, join=how, verify_integrity=True, sort=sort
1141011410
)
1141111411

@@ -11595,9 +11595,7 @@ def _series_round(ser: Series, decimals: int) -> Series:
1159511595

1159611596
if new_cols is not None and len(new_cols) > 0:
1159711597
return self._constructor(
11598-
concat(new_cols, axis=1),
11599-
index=self.index,
11600-
columns=self.columns, # nobug
11598+
concat(new_cols, axis=1), index=self.index, columns=self.columns
1160111599
).__finalize__(self, method="round")
1160211600
else:
1160311601
return self.copy(deep=False)
@@ -14181,7 +14179,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
1418114179
from pandas.core.reshape.concat import concat
1418214180

1418314181
values = collections.defaultdict(list, values)
14184-
result = concat( # nobug
14182+
result = concat(
1418514183
(
1418614184
self.iloc[:, [i]].isin(values[col])
1418714185
for i, col in enumerate(self.columns)

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6512,7 +6512,7 @@ def astype(
65126512
return self.copy(deep=False)
65136513

65146514
# GH 19920: retain column metadata after concat
6515-
result = concat(results, axis=1) # nobug
6515+
result = concat(results, axis=1)
65166516
# GH#40810 retain subclass
65176517
# error: Incompatible types in assignment
65186518
# (expression has type "Self", variable has type "DataFrame")
@@ -9494,7 +9494,7 @@ def compare(
94949494

94959495
# error: List item 0 has incompatible type "NDFrame"; expected
94969496
# "Union[Series, DataFrame]"
9497-
diff = concat( # bug
9497+
diff = concat(
94989498
[self, other], # type: ignore[list-item]
94999499
axis=axis,
95009500
keys=result_names,

pandas/core/groupby/generic.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
582582
if any(isinstance(x, DataFrame) for x in results.values()):
583583
from pandas import concat
584584

585-
res_df = concat( # nobug
585+
res_df = concat(
586586
results.values(), axis=1, keys=[key.label for key in results]
587587
)
588588
return res_df
@@ -753,7 +753,7 @@ def _transform_general(
753753
if results:
754754
from pandas.core.reshape.concat import concat
755755

756-
concatenated = concat(results, ignore_index=True) # nobug
756+
concatenated = concat(results, ignore_index=True)
757757
result = self._set_result_index_ordered(concatenated)
758758
else:
759759
result = self.obj._constructor(dtype=np.float64)
@@ -2263,7 +2263,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
22632263
applied.append(res)
22642264

22652265
concat_index = obj.columns
2266-
concatenated = concat( # nobug
2266+
concatenated = concat(
22672267
applied, axis=0, verify_integrity=False, ignore_index=True
22682268
)
22692269
concatenated = concatenated.reindex(concat_index, axis=1)
@@ -2553,7 +2553,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
25532553
# concat would raise
25542554
res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
25552555
else:
2556-
res_df = concat(results, keys=columns, axis=1) # nobug
2556+
res_df = concat(results, keys=columns, axis=1)
25572557

25582558
if not self.as_index:
25592559
res_df.index = default_index(len(res_df))
@@ -3413,9 +3413,7 @@ def _wrap_transform_general_frame(
34133413
# other dimension; this will preserve dtypes
34143414
# GH14457
34153415
if res.index.is_(obj.index):
3416-
res_frame = concat(
3417-
[res] * len(group.columns), axis=1, ignore_index=True
3418-
) # nobug
3416+
res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True)
34193417
res_frame.columns = group.columns
34203418
res_frame.index = group.index
34213419
else:

pandas/core/groupby/groupby.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,7 +1141,7 @@ def _concat_objects(
11411141
group_levels = self._grouper.levels
11421142
group_names = self._grouper.names
11431143

1144-
result = concat( # maybebug
1144+
result = concat(
11451145
values,
11461146
axis=0,
11471147
keys=group_keys,
@@ -1150,10 +1150,10 @@ def _concat_objects(
11501150
sort=False,
11511151
)
11521152
else:
1153-
result = concat(values, axis=0) # maybebug
1153+
result = concat(values, axis=0)
11541154

11551155
elif not not_indexed_same:
1156-
result = concat(values, axis=0) # maybebug
1156+
result = concat(values, axis=0)
11571157

11581158
ax = self._selected_obj.index
11591159
if self.dropna:
@@ -1176,7 +1176,7 @@ def _concat_objects(
11761176
result = result.reindex(ax, axis=0)
11771177

11781178
else:
1179-
result = concat(values, axis=0) # maybebug
1179+
result = concat(values, axis=0)
11801180

11811181
if self.obj.ndim == 1:
11821182
name = self.obj.name
@@ -5231,7 +5231,7 @@ def shift(
52315231
return (
52325232
shifted_dataframes[0]
52335233
if len(shifted_dataframes) == 1
5234-
else concat(shifted_dataframes, axis=1, sort=False) # nobug
5234+
else concat(shifted_dataframes, axis=1, sort=False)
52355235
)
52365236

52375237
@final

pandas/core/indexes/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5384,9 +5384,9 @@ def append(self, other: Index | Sequence[Index]) -> Index:
53845384
names = {obj.name for obj in to_concat}
53855385
name = None if len(names) > 1 else self.name
53865386

5387-
return self._concat(to_concat, name) # nobug
5387+
return self._concat(to_concat, name)
53885388

5389-
def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # nobug
5389+
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
53905390
"""
53915391
Concatenate multiple Index objects.
53925392
"""

pandas/core/indexes/range.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,7 +1181,7 @@ def insert(self, loc: int, item) -> Index:
11811181

11821182
return super().insert(loc, item)
11831183

1184-
def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug
1184+
def _concat(self, indexes: list[Index], name: Hashable) -> Index:
11851185
"""
11861186
Overriding parent method for the case of all RangeIndex instances.
11871187
@@ -1191,7 +1191,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug
11911191
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64')
11921192
"""
11931193
if not all(isinstance(x, RangeIndex) for x in indexes):
1194-
result = super()._concat(indexes, name) # nobug
1194+
result = super()._concat(indexes, name)
11951195
if result.dtype.kind == "i":
11961196
return self._shallow_copy(result._values)
11971197
return result

0 commit comments

Comments
 (0)