2222 marks = [
2323 td .skip_if_no ("xlrd" ),
2424 pytest .mark .filterwarnings ("ignore:.*(tree\\ .iter|html argument)" ),
25+ pytest .mark .filterwarnings (
26+ 'ignore:The Excel reader engine "xlrd" is deprecated,'
27+ ),
2528 ],
2629 ),
2730 pytest .param (
3437 pytest .param (
3538 None ,
3639 marks = [
37- td .skip_if_no ("xlrd " ),
38- pytest .mark .filterwarnings ("ignore:.*(tree \\ .iter| html argument) " ),
40+ td .skip_if_no ("openpyxl " ),
41+ pytest .mark .filterwarnings ("ignore:.*html argument" ),
3942 ],
4043 ),
4144 pytest .param ("pyxlsb" , marks = td .skip_if_no ("pyxlsb" )),
@@ -51,6 +54,8 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
5154 engine = engine .values [0 ]
5255 if engine == "openpyxl" and read_ext == ".xls" :
5356 return False
57+ if engine is None and read_ext == ".xls" :
58+ return False
5459 if engine == "odf" and read_ext != ".ods" :
5560 return False
5661 if read_ext == ".ods" and engine != "odf" :
@@ -559,7 +564,7 @@ def test_date_conversion_overflow(self, read_ext):
559564 columns = ["DateColWithBigInt" , "StringCol" ],
560565 )
561566
562- if pd .read_excel .keywords ["engine" ] == "openpyxl" :
567+ if pd .read_excel .keywords ["engine" ] in [ "openpyxl" , None ] :
563568 pytest .xfail ("Maybe not supported by openpyxl" )
564569
565570 result = pd .read_excel ("testdateoverflow" + read_ext )
@@ -942,7 +947,10 @@ def test_read_excel_squeeze(self, read_ext):
942947 expected = pd .Series ([1 , 2 , 3 ], name = "a" )
943948 tm .assert_series_equal (actual , expected )
944949
945- def test_deprecated_kwargs (self , read_ext ):
950+ def test_deprecated_kwargs (self , engine , read_ext ):
951+ if engine == "xlrd" :
952+ pytest .skip ("Use of xlrd engine produces a FutureWarning as well" )
953+
946954 with tm .assert_produces_warning (FutureWarning , raise_on_extra_warnings = False ):
947955 pd .read_excel ("test1" + read_ext , "Sheet1" , 0 )
948956
@@ -961,6 +969,19 @@ def test_no_header_with_list_index_col(self, read_ext):
961969 )
962970 tm .assert_frame_equal (expected , result )
963971
972+ def test_excel_high_surrogate (self , engine , read_ext ):
973+ # GH 23809
974+ if read_ext != ".xlsx" :
975+ pytest .skip ("Test is only applicable to .xlsx file" )
976+ if engine in ["openpyxl" , None ]:
977+ pytest .skip ("Test does not work for openpyxl" )
978+
979+ expected = pd .DataFrame (["\udc88 " ], columns = ["Column1" ])
980+
981+ # should not produce a segmentation violation
982+ actual = pd .read_excel ("high_surrogate.xlsx" )
983+ tm .assert_frame_equal (expected , actual )
984+
964985
965986class TestExcelFileRead :
966987 @pytest .fixture (autouse = True )
@@ -1116,14 +1137,6 @@ def test_excel_read_binary(self, engine, read_ext):
11161137 actual = pd .read_excel (data , engine = engine )
11171138 tm .assert_frame_equal (expected , actual )
11181139
1119- def test_excel_high_surrogate (self , engine ):
1120- # GH 23809
1121- expected = pd .DataFrame (["\udc88 " ], columns = ["Column1" ])
1122-
1123- # should not produce a segmentation violation
1124- actual = pd .read_excel ("high_surrogate.xlsx" )
1125- tm .assert_frame_equal (expected , actual )
1126-
11271140 @pytest .mark .parametrize ("filename" , ["df_empty.xlsx" , "df_equals.xlsx" ])
11281141 def test_header_with_index_col (self , engine , filename ):
11291142 # GH 33476
0 commit comments