1313)
1414from pandas .tests .strings import (
1515 _convert_na_value ,
16- object_pyarrow_numpy ,
16+ is_object_or_nan_string_dtype ,
1717)
1818
1919# --------------------------------------------------------------------------------------
@@ -33,7 +33,9 @@ def test_contains(any_string_dtype):
3333 pat = "mmm[_]+"
3434
3535 result = values .str .contains (pat )
36- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
36+ expected_dtype = (
37+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
38+ )
3739 expected = Series (
3840 np .array ([False , np .nan , True , True , False ], dtype = np .object_ ),
3941 dtype = expected_dtype ,
@@ -52,7 +54,9 @@ def test_contains(any_string_dtype):
5254 dtype = any_string_dtype ,
5355 )
5456 result = values .str .contains (pat )
55- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
57+ expected_dtype = (
58+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
59+ )
5660 expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
5761 tm .assert_series_equal (result , expected )
5862
@@ -79,14 +83,18 @@ def test_contains(any_string_dtype):
7983 pat = "mmm[_]+"
8084
8185 result = values .str .contains (pat )
82- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
86+ expected_dtype = (
87+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
88+ )
8389 expected = Series (
8490 np .array ([False , np .nan , True , True ], dtype = np .object_ ), dtype = expected_dtype
8591 )
8692 tm .assert_series_equal (result , expected )
8793
8894 result = values .str .contains (pat , na = False )
89- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
95+ expected_dtype = (
96+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
97+ )
9098 expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
9199 tm .assert_series_equal (result , expected )
92100
@@ -171,7 +179,9 @@ def test_contains_moar(any_string_dtype):
171179 )
172180
173181 result = s .str .contains ("a" )
174- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
182+ expected_dtype = (
183+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
184+ )
175185 expected = Series (
176186 [False , False , False , True , True , False , np .nan , False , False , True ],
177187 dtype = expected_dtype ,
@@ -212,7 +222,9 @@ def test_contains_nan(any_string_dtype):
212222 s = Series ([np .nan , np .nan , np .nan ], dtype = any_string_dtype )
213223
214224 result = s .str .contains ("foo" , na = False )
215- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
225+ expected_dtype = (
226+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
227+ )
216228 expected = Series ([False , False , False ], dtype = expected_dtype )
217229 tm .assert_series_equal (result , expected )
218230
@@ -230,7 +242,9 @@ def test_contains_nan(any_string_dtype):
230242 tm .assert_series_equal (result , expected )
231243
232244 result = s .str .contains ("foo" )
233- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
245+ expected_dtype = (
246+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
247+ )
234248 expected = Series ([np .nan , np .nan , np .nan ], dtype = expected_dtype )
235249 tm .assert_series_equal (result , expected )
236250
@@ -675,7 +689,9 @@ def test_replace_regex_single_character(regex, any_string_dtype):
675689
676690def test_match (any_string_dtype ):
677691 # New match behavior introduced in 0.13
678- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
692+ expected_dtype = (
693+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
694+ )
679695
680696 values = Series (["fooBAD__barBAD" , np .nan , "foo" ], dtype = any_string_dtype )
681697 result = values .str .match (".*(BAD[_]+).*(BAD)" )
@@ -730,20 +746,26 @@ def test_match_na_kwarg(any_string_dtype):
730746 s = Series (["a" , "b" , np .nan ], dtype = any_string_dtype )
731747
732748 result = s .str .match ("a" , na = False )
733- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
749+ expected_dtype = (
750+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
751+ )
734752 expected = Series ([True , False , False ], dtype = expected_dtype )
735753 tm .assert_series_equal (result , expected )
736754
737755 result = s .str .match ("a" )
738- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
756+ expected_dtype = (
757+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
758+ )
739759 expected = Series ([True , False , np .nan ], dtype = expected_dtype )
740760 tm .assert_series_equal (result , expected )
741761
742762
743763def test_match_case_kwarg (any_string_dtype ):
744764 values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
745765 result = values .str .match ("ab" , case = False )
746- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
766+ expected_dtype = (
767+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
768+ )
747769 expected = Series ([True , True , True , True ], dtype = expected_dtype )
748770 tm .assert_series_equal (result , expected )
749771
@@ -759,7 +781,9 @@ def test_fullmatch(any_string_dtype):
759781 ["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
760782 )
761783 result = ser .str .fullmatch (".*BAD[_]+.*BAD" )
762- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
784+ expected_dtype = (
785+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
786+ )
763787 expected = Series ([True , False , np .nan , False ], dtype = expected_dtype )
764788 tm .assert_series_equal (result , expected )
765789
@@ -768,7 +792,9 @@ def test_fullmatch_dollar_literal(any_string_dtype):
768792 # GH 56652
769793 ser = Series (["foo" , "foo$foo" , np .nan , "foo$" ], dtype = any_string_dtype )
770794 result = ser .str .fullmatch ("foo\\ $" )
771- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
795+ expected_dtype = (
796+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
797+ )
772798 expected = Series ([False , False , np .nan , True ], dtype = expected_dtype )
773799 tm .assert_series_equal (result , expected )
774800
@@ -778,14 +804,18 @@ def test_fullmatch_na_kwarg(any_string_dtype):
778804 ["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
779805 )
780806 result = ser .str .fullmatch (".*BAD[_]+.*BAD" , na = False )
781- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
807+ expected_dtype = (
808+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
809+ )
782810 expected = Series ([True , False , False , False ], dtype = expected_dtype )
783811 tm .assert_series_equal (result , expected )
784812
785813
786814def test_fullmatch_case_kwarg (any_string_dtype , performance_warning ):
787815 ser = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
788- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
816+ expected_dtype = (
817+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
818+ )
789819
790820 expected = Series ([True , False , False , False ], dtype = expected_dtype )
791821
@@ -859,7 +889,9 @@ def test_find(any_string_dtype):
859889 ser = Series (
860890 ["ABCDEFG" , "BCDEFEF" , "DEFGHIJEF" , "EFGHEF" , "XXXX" ], dtype = any_string_dtype
861891 )
862- expected_dtype = np .int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
892+ expected_dtype = (
893+ np .int64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
894+ )
863895
864896 result = ser .str .find ("EF" )
865897 expected = Series ([4 , 3 , 1 , 0 , - 1 ], dtype = expected_dtype )
@@ -911,7 +943,9 @@ def test_find_nan(any_string_dtype):
911943 ser = Series (
912944 ["ABCDEFG" , np .nan , "DEFGHIJEF" , np .nan , "XXXX" ], dtype = any_string_dtype
913945 )
914- expected_dtype = np .float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
946+ expected_dtype = (
947+ np .float64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
948+ )
915949
916950 result = ser .str .find ("EF" )
917951 expected = Series ([4 , np .nan , 1 , np .nan , - 1 ], dtype = expected_dtype )
0 commit comments