pandas-dev · phofl · Nov 25, 2021 · Nov 24, 2021 · Nov 24, 2021 · Nov 24, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -653,6 +653,7 @@ I/O
 - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
 - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
+- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -510,9 +510,15 @@ def _read(
  filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
 ):
  """Generic reader of line files."""
- if kwds.get("date_parser", None) is not None:
- if isinstance(kwds["parse_dates"], bool):
- kwds["parse_dates"] = True
+ # if we pass a date_parser and parse_dates=False, we should not parse the
+ # dates GH#44366
+ if (
+ kwds.get("date_parser", None) is not None
+ and kwds.get("parse_dates", None) is None
+ ):
+ kwds["parse_dates"] = True
+ elif kwds.get("parse_dates", None) is None:
+ kwds["parse_dates"] = False
 
  # Extract some of the arguments (pass chunksize on).
  iterator = kwds.get("iterator", False)
@@ -585,7 +591,7 @@ def read_csv(
  verbose=False,
  skip_blank_lines=True,
  # Datetime Handling
- parse_dates=False,
+ parse_dates=None,
  infer_datetime_format=False,
  keep_date_col=False,
  date_parser=None,

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -97,6 +97,43 @@ def __custom_date_parser(time):
  tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow
+def test_read_csv_with_custom_date_parser_parse_dates_false(all_parsers):
+ # GH44366
+ def __custom_date_parser(time):
+ time = time.astype(np.float_)
+ time = time.astype(np.int_) # convert float seconds to int type
+ return pd.to_timedelta(time, unit="s")
+
+ testdata = StringIO(
+ """time e n h
+ 41047.00 -98573.7297 871458.0640 389.0089
+ 41048.00 -98573.7299 871458.0640 389.0089
+ 41049.00 -98573.7300 871458.0642 389.0088
+ 41050.00 -98573.7299 871458.0643 389.0088
+ 41051.00 -98573.7302 871458.0640 389.0086
+ """
+ )
+ result = all_parsers.read_csv(
+ testdata,
+ delim_whitespace=True,
+ parse_dates=False,
+ date_parser=__custom_date_parser,
+ index_col="time",
+ )
+ time = Series([41047.00, 41048.00, 41049.00, 41050.00, 41051.00], name="time")
+ expected = DataFrame(
+ {
+ "e": [-98573.7297, -98573.7299, -98573.7300, -98573.7299, -98573.7302],
+ "n": [871458.0640, 871458.0640, 871458.0642, 871458.0643, 871458.0640],
+ "h": [389.0089, 389.0089, 389.0088, 389.0088, 389.0086],
+ },
+ index=time,
+ )
+
+ tm.assert_frame_equal(result, expected)
+
+
 @xfail_pyarrow
 def test_separator_date_conflict(all_parsers):
  # Regression test for gh-4678