pandas-dev
diff --git a/‎doc/source/whatsnew/v1.2.0.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.2.0.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/io/excel/_base.py‎
Lines changed: 14 additions & 4 deletions b/‎pandas/io/excel/_base.py‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎pandas/io/excel/_openpyxl.py‎
Lines changed: 6 additions & 1 deletion b/‎pandas/io/excel/_openpyxl.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎pandas/tests/io/excel/test_readers.py‎
Lines changed: 25 additions & 12 deletions b/‎pandas/tests/io/excel/test_readers.py‎
Lines changed: 25 additions & 12 deletions
diff --git a/‎pandas/tests/io/excel/test_writers.py‎
Lines changed: 16 additions & 2 deletions b/‎pandas/tests/io/excel/test_writers.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎pandas/tests/io/excel/test_xlrd.py‎
Lines changed: 27 additions & 1 deletion b/‎pandas/tests/io/excel/test_xlrd.py‎
Lines changed: 27 additions & 1 deletion
@@ -143,6 +143,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
 Deprecations
 ~~~~~~~~~~~~
 - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
+- :func:`read_excel` default engine "xlrd" is replaced by "openpyxl" because "xlrd" is deprecated (:issue:`28547`).
 -
 -
 
 
@@ -4,6 +4,7 @@
 import os
 from textwrap import fill
 from typing import Any, Mapping, Union
+import warnings
 
 from pandas._config import config
 
@@ -825,8 +826,7 @@ def _is_ods_stream(stream: Union[BufferedIOBase, RawIOBase]) -> bool:
 class ExcelFile:
  """
  Class for parsing tabular excel sheets into DataFrame objects.
-
- Uses xlrd engine by default. See read_excel for more documentation
+ Uses xlrd, openpyxl or odf. See read_excel for more documentation
 
  Parameters
  ----------
@@ -837,7 +837,7 @@ class ExcelFile:
  engine : str, default None
  If io is not a buffer or path, this must be set to identify io.
  Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
- default ``xlrd``.
+ default ``openpyxl``, ``xlrd`` for .xls files, ``odf`` for .ods files.
  Engine compatibility :
  - ``xlrd`` supports most old/new Excel file formats.
  - ``openpyxl`` supports newer Excel file formats.
@@ -861,14 +861,24 @@ def __init__(
  self, path_or_buffer, engine=None, storage_options: StorageOptions = None
  ):
  if engine is None:
- engine = "xlrd"
+ engine = "openpyxl"
  if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
  if _is_ods_stream(path_or_buffer):
  engine = "odf"
  else:
  ext = os.path.splitext(str(path_or_buffer))[-1]
  if ext == ".ods":
  engine = "odf"
+ elif ext == ".xls":
+ engine = "xlrd"
+
+ elif engine == "xlrd":
+ warnings.warn(
+ 'The Excel reader engine "xlrd" is deprecated, use "openpyxl" instead. '
+ 'Specify engine="openpyxl" to suppress this warning.',
+ FutureWarning,
+ stacklevel=2,
+ )
  if engine not in self._engines:
  raise ValueError(f"Unknown engine: {engine}")
 
 
@@ -1,3 +1,4 @@
+from datetime import datetime
 from typing import List
 
 import numpy as np
@@ -517,7 +518,11 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
  # TODO: replace with openpyxl constants
  if cell.is_date:
- return cell.value
+ try:
+ # workaround for inaccurate timestamp notation in excel
+ return datetime.fromtimestamp(round(cell.value.timestamp()))
+ except (AttributeError, OSError):
+ return cell.value
  elif cell.data_type == "e":
  return np.nan
  elif cell.data_type == "b":
 
@@ -22,6 +22,9 @@
  marks=[
  td.skip_if_no("xlrd"),
  pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
+ pytest.mark.filterwarnings(
+ 'ignore:The Excel reader engine "xlrd" is deprecated,'
+ ),
  ],
  ),
  pytest.param(
@@ -34,8 +37,8 @@
  pytest.param(
  None,
  marks=[
- td.skip_if_no("xlrd"),
- pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
+ td.skip_if_no("openpyxl"),
+ pytest.mark.filterwarnings("ignore:.*html argument"),
  ],
  ),
  pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")),
@@ -51,6 +54,8 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
  engine = engine.values[0]
  if engine == "openpyxl" and read_ext == ".xls":
  return False
+ if engine is None and read_ext == ".xls":
+ return False
  if engine == "odf" and read_ext != ".ods":
  return False
  if read_ext == ".ods" and engine != "odf":
@@ -559,7 +564,7 @@ def test_date_conversion_overflow(self, read_ext):
  columns=["DateColWithBigInt", "StringCol"],
  )
 
- if pd.read_excel.keywords["engine"] == "openpyxl":
+ if pd.read_excel.keywords["engine"] in ["openpyxl", None]:
  pytest.xfail("Maybe not supported by openpyxl")
 
  result = pd.read_excel("testdateoverflow" + read_ext)
@@ -942,7 +947,10 @@ def test_read_excel_squeeze(self, read_ext):
  expected = pd.Series([1, 2, 3], name="a")
  tm.assert_series_equal(actual, expected)
 
- def test_deprecated_kwargs(self, read_ext):
+ def test_deprecated_kwargs(self, engine, read_ext):
+ if engine == "xlrd":
+ pytest.skip("Use of xlrd engine produces a FutureWarning as well")
+
  with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False):
  pd.read_excel("test1" + read_ext, "Sheet1", 0)
 
@@ -961,6 +969,19 @@ def test_no_header_with_list_index_col(self, read_ext):
  )
  tm.assert_frame_equal(expected, result)
 
+ def test_excel_high_surrogate(self, engine, read_ext):
+ # GH 23809
+ if read_ext != ".xlsx":
+ pytest.skip("Test is only applicable to .xlsx file")
+ if engine in ["openpyxl", None]:
+ pytest.skip("Test does not work for openpyxl")
+
+ expected = pd.DataFrame(["\udc88"], columns=["Column1"])
+
+ # should not produce a segmentation violation
+ actual = pd.read_excel("high_surrogate.xlsx")
+ tm.assert_frame_equal(expected, actual)
+
 
 class TestExcelFileRead:
  @pytest.fixture(autouse=True)
@@ -1116,14 +1137,6 @@ def test_excel_read_binary(self, engine, read_ext):
  actual = pd.read_excel(data, engine=engine)
  tm.assert_frame_equal(expected, actual)
 
- def test_excel_high_surrogate(self, engine):
- # GH 23809
- expected = pd.DataFrame(["\udc88"], columns=["Column1"])
-
- # should not produce a segmentation violation
- actual = pd.read_excel("high_surrogate.xlsx")
- tm.assert_frame_equal(expected, actual)
-
  @pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"])
  def test_header_with_index_col(self, engine, filename):
  # GH 33476
 
@@ -351,12 +351,16 @@ def test_excel_sheet_by_name_raise(self, path, engine):
  msg = "sheet 0 not found"
  with pytest.raises(ValueError, match=msg):
  pd.read_excel(xl, "0")
- else:
+ elif engine == "xlwt":
  import xlrd
 
  msg = "No sheet named <'0'>"
  with pytest.raises(xlrd.XLRDError, match=msg):
  pd.read_excel(xl, sheet_name="0")
+ else: # openpyxl
+ msg = "Worksheet 0 does not exist."
+ with pytest.raises(KeyError, match=msg):
+ pd.read_excel(xl, sheet_name="0")
 
  def test_excel_writer_context_manager(self, frame, path):
  with ExcelWriter(path) as writer:
@@ -1199,6 +1203,9 @@ def test_datetimes(self, path):
 
  tm.assert_series_equal(write_frame["A"], read_frame["A"])
 
+ @pytest.mark.filterwarnings(
+ 'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
+ )
  def test_bytes_io(self, engine):
  # see gh-7074
  bio = BytesIO()
@@ -1209,8 +1216,15 @@ def test_bytes_io(self, engine):
  df.to_excel(writer)
  writer.save()
 
+ if engine == "xlwt":
+ read_engine = "xlrd"
+ elif engine == "xlsxwriter":
+ read_engine = "openpyxl"
+ else:
+ read_engine = engine
+
  bio.seek(0)
- reread_df = pd.read_excel(bio, index_col=0)
+ reread_df = pd.read_excel(bio, index_col=0, engine=read_engine)
  tm.assert_frame_equal(df, reread_df)
 
  def test_write_lists_dict(self, path):
 
@@ -17,6 +17,9 @@ def skip_ods_and_xlsb_files(read_ext):
  pytest.skip("Not valid for xlrd")
 
 
+@pytest.mark.filterwarnings(
+ 'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
+)
 def test_read_xlrd_book(read_ext, frame):
  df = frame
 
@@ -36,8 +39,31 @@ def test_read_xlrd_book(read_ext, frame):
 
 
 # TODO: test for openpyxl as well
+@pytest.mark.filterwarnings(
+ 'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
+)
 def test_excel_table_sheet_by_index(datapath, read_ext):
  path = datapath("io", "data", "excel", f"test1{read_ext}")
- with pd.ExcelFile(path) as excel:
+ with pd.ExcelFile(path, engine="xlrd") as excel:
  with pytest.raises(xlrd.XLRDError):
  pd.read_excel(excel, sheet_name="asdf")
+
+
+def test_excel_file_warning_with_xlsx_file(datapath):
+ # GH 29375
+ path = datapath("io", "data", "excel", "test1.xlsx")
+ with tm.assert_produces_warning(
+ FutureWarning, check_stacklevel=True, raise_on_extra_warnings=False
+ ) as w:
+ pd.ExcelFile(path, engine="xlrd")
+ assert '"xlrd" is deprecated, use "openpyxl" instead.' in str(w[0].message)
+
+
+def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
+ # GH 29375
+ path = datapath("io", "data", "excel", "test1.xlsx")
+ with tm.assert_produces_warning(
+ FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
+ ) as w:
+ pd.read_excel(path, "Sheet1", engine="xlrd")
+ assert '"xlrd" is deprecated, use "openpyxl" instead.' in str(w[0].message)
Original file line number	Diff line number	Diff line change
@@ -143,6 +143,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
`143`	`143`	`Deprecations`
`144`	`144`	`~~~~~~~~~~~~`
`145`	`145`	- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
	`146`	+- :func:`read_excel` default engine "xlrd" is replaced by "openpyxl" because "xlrd" is deprecated (:issue:`28547`).
`146`	`147`	`-`
`147`	`148`	`-`
`148`	`149`