11import abc
22import datetime
3+ import inspect
34from io import BufferedIOBase , BytesIO , RawIOBase
45import os
56from textwrap import fill
67from typing import Any , Dict , Mapping , Union , cast
8+ import warnings
79
810from pandas ._config import config
911
1012from pandas ._libs .parsers import STR_NA_VALUES
1113from pandas ._typing import Buffer , FilePathOrBuffer , StorageOptions
14+ from pandas .compat ._optional import import_optional_dependency
1215from pandas .errors import EmptyDataError
1316from pandas .util ._decorators import Appender , deprecate_nonkeyword_arguments
1417
99102 of dtype conversion.
100103engine : str, default None
101104 If io is not a buffer or path, this must be set to identify io.
102- Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd" .
105+ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
103106 Engine compatibility :
107+
104108 - "xlrd" supports most old/new Excel file formats.
105109 - "openpyxl" supports newer Excel file formats.
106110 - "odf" supports OpenDocument file formats (.odf, .ods, .odt).
107111 - "pyxlsb" supports Binary Excel files.
112+
113+ .. versionchanged:: 1.2.0
114+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
115+ is no longer maintained, and is not supported with
116+ python >= 3.9. When ``engine=None``, the following logic will be
117+ used to determine the engine.
118+
119+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
120+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
121+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
122+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will
123+ be used.
124+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
125+ then ``openpyxl`` will be used.
126+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
127+
128+ Specifying ``engine="xlrd"`` will continue to be allowed for the
129+ indefinite future.
130+
108131converters : dict, default None
109132 Dict of functions for converting values in certain columns. Keys can
110133 either be integers or column labels, values are functions that take one
@@ -880,13 +903,32 @@ class ExcelFile:
880903 .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
881904 engine : str, default None
882905 If io is not a buffer or path, this must be set to identify io.
883- Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
884- default ``xlrd``.
906+ Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
885907 Engine compatibility :
908+
886909 - ``xlrd`` supports most old/new Excel file formats.
887910 - ``openpyxl`` supports newer Excel file formats.
888911 - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
889912 - ``pyxlsb`` supports Binary Excel files.
913+
914+ .. versionchanged:: 1.2.0
915+
916+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
917+ is no longer maintained, and is not supported with
918+ python >= 3.9. When ``engine=None``, the following logic will be
919+ used to determine the engine.
920+
921+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
922+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
923+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
924+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd``
925+ will be used.
926+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
927+ then ``openpyxl`` will be used.
928+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
929+
930+ Specifying ``engine="xlrd"`` will continue to be allowed for the
931+ indefinite future.
890932 """
891933
892934 from pandas .io .excel ._odfreader import ODFReader
@@ -905,14 +947,59 @@ def __init__(
905947 self , path_or_buffer , engine = None , storage_options : StorageOptions = None
906948 ):
907949 if engine is None :
908- engine = "xlrd"
950+ # Determine ext and use odf for ods stream/file
909951 if isinstance (path_or_buffer , (BufferedIOBase , RawIOBase )):
952+ ext = None
910953 if _is_ods_stream (path_or_buffer ):
911954 engine = "odf"
912955 else :
913956 ext = os .path .splitext (str (path_or_buffer ))[- 1 ]
914957 if ext == ".ods" :
915958 engine = "odf"
959+
960+ if (
961+ import_optional_dependency (
962+ "xlrd" , raise_on_missing = False , on_version = "ignore"
963+ )
964+ is not None
965+ ):
966+ from xlrd import Book
967+
968+ if isinstance (path_or_buffer , Book ):
969+ engine = "xlrd"
970+
971+ # GH 35029 - Prefer openpyxl except for xls files
972+ if engine is None :
973+ if ext is None or isinstance (path_or_buffer , bytes ) or ext == ".xls" :
974+ engine = "xlrd"
975+ elif (
976+ import_optional_dependency (
977+ "openpyxl" , raise_on_missing = False , on_version = "ignore"
978+ )
979+ is not None
980+ ):
981+ engine = "openpyxl"
982+ else :
983+ caller = inspect .stack ()[1 ]
984+ if (
985+ caller .filename .endswith ("pandas/io/excel/_base.py" )
986+ and caller .function == "read_excel"
987+ ):
988+ stacklevel = 4
989+ else :
990+ stacklevel = 2
991+ warnings .warn (
992+ "The xlrd engine is no longer maintained and is not "
993+ "supported when using pandas with python >= 3.9. However, "
994+ "the engine xlrd will continue to be allowed for the "
995+ "indefinite future. Beginning with pandas 1.2.0, the "
996+ "openpyxl engine will be used if it is installed and the "
997+ "engine argument is not specified. Either install openpyxl "
998+ "or specify engine='xlrd' to silence this warning." ,
999+ FutureWarning ,
1000+ stacklevel = stacklevel ,
1001+ )
1002+ engine = "xlrd"
9161003 if engine not in self ._engines :
9171004 raise ValueError (f"Unknown engine: { engine } " )
9181005
0 commit comments