pandas-dev
diff --git a/‎asv_bench/benchmarks/join_merge.py‎
Lines changed: 17 additions & 0 deletions b/‎asv_bench/benchmarks/join_merge.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎doc/source/development/contributing_docstring.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/development/contributing_docstring.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/maintaining.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/source/development/maintaining.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/source/user_guide/enhancingperf.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/enhancingperf.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/io.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/io.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.24.0.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v0.24.0.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.0.0.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v1.0.0.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎pandas/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/_libs/hashtable.pyi‎
Lines changed: 7 additions & 1 deletion b/‎pandas/_libs/hashtable.pyi‎
Lines changed: 7 additions & 1 deletion
@@ -328,6 +328,23 @@ def time_i8merge(self, how):
  merge(self.left, self.right, how=how)
 
 
+class UniqueMerge:
+ params = [4_000_000, 1_000_000]
+ param_names = ["unique_elements"]
+
+ def setup(self, unique_elements):
+ N = 1_000_000
+ self.left = DataFrame({"a": np.random.randint(1, unique_elements, (N,))})
+ self.right = DataFrame({"a": np.random.randint(1, unique_elements, (N,))})
+ uniques = self.right.a.drop_duplicates()
+ self.right["a"] = concat(
+ [uniques, Series(np.arange(0, -(N - len(uniques)), -1))], ignore_index=True
+ )
+
+ def time_unique_merge(self, unique_elements):
+ merge(self.left, self.right, how="inner")
+
+
 class MergeDatetime:
  params = [
  [
 
@@ -940,7 +940,7 @@ Finally, docstrings can also be appended to with the ``doc`` decorator.
 
 In this example, we'll create a parent docstring normally (this is like
 ``pandas.core.generic.NDFrame``). Then we'll have two children (like
-``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll
+``pandas.core.series.Series`` and ``pandas.DataFrame``). We'll
 substitute the class names in this docstring.
 
 .. code-block:: python
 
@@ -151,15 +151,15 @@ and then run::
  git bisect start
  git bisect good v1.4.0
  git bisect bad v1.5.0
- git bisect run bash -c "python setup.py build_ext -j 4; python t.py"
+ git bisect run bash -c "python -m pip install -ve . --no-build-isolation --config-settings editable-verbose=true; python t.py"
 
 This finds the first commit that changed the behavior. The C extensions have to be
 rebuilt at every step, so the search can take a while.
 
 Exit bisect and rebuild the current version::
 
  git bisect reset
- python setup.py build_ext -j 4
+ python -m pip install -ve . --no-build-isolation --config-settings editable-verbose=true
 
 Report your findings under the corresponding issue and ping the commit author to get
 their input.
 
@@ -453,7 +453,7 @@ by evaluate arithmetic and boolean expression all at once for large :class:`~pan
  :func:`~pandas.eval` is many orders of magnitude slower for
  smaller expressions or objects than plain Python. A good rule of thumb is
  to only use :func:`~pandas.eval` when you have a
- :class:`.DataFrame` with more than 10,000 rows.
+ :class:`~pandas.core.frame.DataFrame` with more than 10,000 rows.
 
 Supported syntax
 ~~~~~~~~~~~~~~~~
 
@@ -6400,7 +6400,7 @@ ignored.
  In [2]: df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz})
 
  In [3]: df.info()
- <class 'pandas.core.frame.DataFrame'>
+ <class 'pandas.DataFrame'>
  RangeIndex: 1000000 entries, 0 to 999999
  Data columns (total 2 columns):
  A 1000000 non-null float64
 
@@ -840,7 +840,7 @@ then all the columns are dummy-encoded, and a :class:`SparseDataFrame` was retur
  In [2]: df = pd.DataFrame({"A": [1, 2], "B": ['a', 'b'], "C": ['a', 'a']})
 
  In [3]: type(pd.get_dummies(df, sparse=True))
- Out[3]: pandas.core.frame.DataFrame
+ Out[3]: pandas.DataFrame
 
  In [4]: type(pd.get_dummies(df[['B', 'C']], sparse=True))
  Out[4]: pandas.core.sparse.frame.SparseDataFrame
 
@@ -414,7 +414,7 @@ Extended verbose info output for :class:`~pandas.DataFrame`
  ... "text_col": ["a", "b", "c"],
  ... "float_col": [0.0, 0.1, 0.2]})
  In [2]: df.info(verbose=True)
- <class 'pandas.core.frame.DataFrame'>
+ <class 'pandas.DataFrame'>
  RangeIndex: 3 entries, 0 to 2
  Data columns (total 3 columns):
  int_col 3 non-null int64
 
@@ -211,6 +211,7 @@ Removal of prior version deprecations/changes
 - Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`)
 - Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
 - Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
+- Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`)
 - Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
 - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
 - Iterating over a :class:`.DataFrameGroupBy` or :class:`.SeriesGroupBy` will return tuples of length 1 for the groups when grouping by ``level`` a list of length 1 (:issue:`50064`)
@@ -256,6 +257,7 @@ Removal of prior version deprecations/changes
 - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`)
 - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`)
 - Removed the :class:`Grouper` attributes ``ax``, ``groups``, ``indexer``, and ``obj`` (:issue:`51206`, :issue:`51182`)
+- Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`)
 - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
 
 .. ---------------------------------------------------------------------------
@@ -284,6 +286,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
+- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
 
@@ -28,7 +28,8 @@
  raise ImportError(
  f"C extension: {_module} not built. If you want to import "
  "pandas from the source directory, you may need to run "
- "'python setup.py build_ext' to build the C extensions first."
+ "'python -m pip install -ve . --no-build-isolation --config-settings "
+ "editable-verbose=true' to build the C extensions first."
  ) from _err
 
 from pandas._config import (
 
@@ -16,7 +16,7 @@ def unique_label_indices(
 class Factorizer:
  count: int
  uniques: Any
- def __init__(self, size_hint: int) -> None: ...
+ def __init__(self, size_hint: int, uses_mask: bool = False) -> None: ...
  def get_count(self) -> int: ...
  def factorize(
  self,
@@ -25,6 +25,9 @@ class Factorizer:
  na_value=...,
  mask=...,
  ) -> npt.NDArray[np.intp]: ...
+ def hash_inner_join(
+ self, values: np.ndarray, mask=...
+ ) -> tuple[np.ndarray, np.ndarray]: ...
 
 class ObjectFactorizer(Factorizer):
  table: PyObjectHashTable
@@ -216,6 +219,9 @@ class HashTable:
  mask=...,
  ignore_na: bool = True,
  ) -> tuple[np.ndarray, npt.NDArray[np.intp]]: ... # np.ndarray[subclass-specific]
+ def hash_inner_join(
+ self, values: np.ndarray, mask=...
+ ) -> tuple[np.ndarray, np.ndarray]: ...
 
 class Complex128HashTable(HashTable): ...
 class Complex64HashTable(HashTable): ...