Skip to content
6 changes: 6 additions & 0 deletions .github/actions/setup-conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ runs:
condarc-file: ci/.condarc
cache-environment: true
cache-downloads: true

- name: Uninstall pyarrow
if: ${{ env.REMOVE_PYARROW == '1' }}
run: |
micromamba remove -y pyarrow
shell: bash -el {0}
7 changes: 6 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
# Prevent the include jobs from overriding other jobs
pattern: [""]
pandas_future_infer_string: ["0"]
include:
- name: "Downstream Compat"
env_file: actions-311-downstream_compat.yaml
Expand Down Expand Up @@ -58,6 +59,9 @@ jobs:
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "zh_CN"
- name: "Future infer strings"
env_file: actions-312.yaml
pandas_future_infer_string: "1"
- name: "Future infer strings (without pyarrow)"
env_file: actions-311.yaml
pandas_future_infer_string: "1"
- name: "Pypy"
Expand Down Expand Up @@ -85,9 +89,10 @@ jobs:
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}
cancel-in-progress: true

services:
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
Expand Down Expand Up @@ -1245,6 +1247,9 @@ def test_agg_multiple_mixed():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_agg_multiple_mixed_raises():
# GH 20909
mdf = DataFrame(
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.errors import SpecificationError

from pandas import (
Expand Down Expand Up @@ -209,6 +212,10 @@ def transform(row):
data.apply(transform, axis=1)


# we should raise a proper TypeError instead of propagating the pyarrow error
@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
@pytest.mark.parametrize(
"df, func, expected",
tm.get_cython_table_params(
Expand All @@ -229,6 +236,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str
df.agg(func, axis=axis)


# we should raise a proper TypeError instead of propagating the pyarrow error
@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
@pytest.mark.parametrize(
"series, func, expected",
chain(
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/apply/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):


def test_numba_unsupported_dtypes(apply_axis):
pytest.importorskip("pyarrow")
f = lambda x: x
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
df["c"] = df["c"].astype("double[pyarrow]")
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arithmetic/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -315,6 +318,9 @@ def test_add(self):
expected = pd.Index(["1a", "1b", "1c"])
tm.assert_index_equal("1" + index, expected)

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_sub_fail(self, using_infer_string):
index = pd.Index([str(i) for i in range(10)])

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
# -----------------------------------------------------------------------------


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
# invalid ops

Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

from pandas._config import using_string_dtype

from pandas.compat import PYPY
from pandas.compat import (
HAS_PYARROW,
PYPY,
)

from pandas import (
Categorical,
Expand Down Expand Up @@ -296,7 +299,9 @@ def test_nbytes(self):
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
assert cat.nbytes == exp

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
)
def test_memory_usage(self):
cat = Categorical([1, 2, 3])

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -442,7 +444,9 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
)
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/arrays/integer/test_reduction.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -102,9 +104,10 @@ def test_groupby_reductions(op, expected):
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
],
)
def test_mixed_reductions(op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string:
expected = expected.astype("bool")
def test_mixed_reductions(request, op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
# TODO(infer_string) inconsistent result type
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
df = DataFrame(
{
"A": ["a", "b", "b"],
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.dtypes import DatetimeTZDtype

import pandas as pd
Expand All @@ -20,6 +24,7 @@
SparseArray,
TimedeltaArray,
)
from pandas.core.arrays.string_ import StringArrayNumpySemantics
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics


Expand Down Expand Up @@ -218,7 +223,9 @@ def test_iter_box_period(self):
)
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
if using_infer_string and dtype == "object":
expected_type = ArrowStringArrayNumpySemantics
expected_type = (
ArrowStringArrayNumpySemantics if HAS_PYARROW else StringArrayNumpySemantics
)
l_values = Series(arr)._values
r_values = pd.Index(arr)._values
assert type(l_values) is expected_type
Expand Down Expand Up @@ -355,6 +362,9 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize(
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/copy_view/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.compat.pyarrow import pa_version_under12p0
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -197,7 +198,7 @@ def test_astype_arrow_timestamp():
assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_convert_dtypes_infer_objects():
ser = Series(["a", "b", "c"])
ser_orig = ser.copy()
Expand All @@ -213,7 +214,7 @@ def test_convert_dtypes_infer_objects():
tm.assert_series_equal(ser, ser_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_convert_dtypes():
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
df_orig = df.copy()
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/copy_view/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas import (
DataFrame,
Index,
Expand All @@ -14,7 +16,7 @@
from pandas.tests.copy_view.util import get_array


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_concat_frames():
df = DataFrame({"b": ["a"] * 3})
df2 = DataFrame({"a": ["a"] * 3})
Expand All @@ -33,7 +35,7 @@ def test_concat_frames():
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_concat_frames_updating_input():
df = DataFrame({"b": ["a"] * 3})
df2 = DataFrame({"a": ["a"] * 3})
Expand Down Expand Up @@ -153,7 +155,7 @@ def test_concat_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
@pytest.mark.parametrize(
"func",
[
Expand Down Expand Up @@ -249,7 +251,7 @@ def test_merge_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_join_on_key():
df_index = Index(["a", "b", "c"], name="key")

Expand Down Expand Up @@ -277,7 +279,7 @@ def test_join_on_key():
tm.assert_frame_equal(df2, df2_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_join_multiple_dataframes_on_key():
df_index = Index(["a", "b", "c"], name="key")

Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas import (
NA,
DataFrame,
Expand Down Expand Up @@ -121,7 +123,7 @@ def test_interpolate_cannot_with_object_dtype():
df.interpolate()


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_interpolate_object_convert_no_op():
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
arr_a = get_array(df, "a")
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -714,7 +716,7 @@ def test_head_tail(method):
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_infer_objects():
df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
df_orig = df.copy()
Expand All @@ -730,6 +732,9 @@ def test_infer_objects():
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_infer_objects_no_reference():
df = DataFrame(
{
Expand Down Expand Up @@ -899,7 +904,7 @@ def test_sort_values_inplace(obj, kwargs):
tm.assert_equal(view, obj_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
@pytest.mark.parametrize("decimals", [-1, 0, 1])
def test_round(decimals):
df = DataFrame({"a": [1, 2], "b": "c"})
Expand Down
Loading