pandas-dev · zhangbowen-coder · Oct 28, 2025
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
@@ -41,22 +41,22 @@
 tables = pytest.importorskip("tables")
 
 
-def test_context(setup_path):
- with tm.ensure_clean(setup_path) as path:
- try:
- with HDFStore(path) as tbl:
- raise ValueError("blah")
- except ValueError:
- pass
- with tm.ensure_clean(setup_path) as path:
+def test_context(setup_path, tmp_path):
+ path = tmp_path / setup_path
+ try:
  with HDFStore(path) as tbl:
- tbl["a"] = DataFrame(
- 1.1 * np.arange(120).reshape((30, 4)),
- columns=Index(list("ABCD"), dtype=object),
- index=Index([f"i-{i}" for i in range(30)], dtype=object),
- )
- assert len(tbl) == 1
- assert type(tbl["a"]) == DataFrame
+ raise ValueError("blah")
+ except ValueError:
+ pass
+ path = tmp_path / setup_path
+ with HDFStore(path) as tbl:
+ tbl["a"] = DataFrame(
+ 1.1 * np.arange(120).reshape((30, 4)),
+ columns=Index(list("ABCD"), dtype=object),
+ index=Index([f"i-{i}" for i in range(30)], dtype=object),
+ )
+ assert len(tbl) == 1
+ assert type(tbl["a"]) == DataFrame
 
 
 def test_no_track_times(tmp_path, setup_path):
@@ -971,37 +971,36 @@ def test_pickle_path_localpath():
 
 
 @pytest.mark.parametrize("propindexes", [True, False])
-def test_copy(propindexes):
+def test_copy(propindexes, temp_file):
  df = DataFrame(
  1.1 * np.arange(120).reshape((30, 4)),
  columns=Index(list("ABCD")),
  index=Index([f"i-{i}" for i in range(30)]),
  )
 
- with tm.ensure_clean() as path:
- with HDFStore(path) as st:
- st.append("df", df, data_columns=["A"])
- with tempfile.NamedTemporaryFile() as new_f:
- with HDFStore(path) as store:
- with contextlib.closing(
- store.copy(new_f.name, keys=None, propindexes=propindexes)
- ) as tstore:
- # check keys
- keys = store.keys()
- assert set(keys) == set(tstore.keys())
- # check indices & nrows
- for k in tstore.keys():
- if tstore.get_storer(k).is_table:
- new_t = tstore.get_storer(k)
- orig_t = store.get_storer(k)
-
- assert orig_t.nrows == new_t.nrows
-
- # check propindixes
- if propindexes:
- for a in orig_t.axes:
- if a.is_indexed:
- assert new_t[a.name].is_indexed
+ with HDFStore(temp_file) as st:
+ st.append("df", df, data_columns=["A"])
+ with tempfile.NamedTemporaryFile() as new_f:
+ with HDFStore(temp_file) as store:
+ with contextlib.closing(
+ store.copy(new_f.name, keys=None, propindexes=propindexes)
+ ) as tstore:
+ # check keys
+ keys = store.keys()
+ assert set(keys) == set(tstore.keys())
+ # check indices & nrows
+ for k in tstore.keys():
+ if tstore.get_storer(k).is_table:
+ new_t = tstore.get_storer(k)
+ orig_t = store.get_storer(k)
+
+ assert orig_t.nrows == new_t.nrows
+
+ # check propindixes
+ if propindexes:
+ for a in orig_t.axes:
+ if a.is_indexed:
+ assert new_t[a.name].is_indexed
 
 
 def test_duplicate_column_name(tmp_path, setup_path):

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -86,12 +86,11 @@ def test_stringify_path_fspath(self):
  result = icom.stringify_path(p)
  assert result == "foo/bar.csv"
 
- def test_stringify_file_and_path_like(self):
+ def test_stringify_file_and_path_like(self,temp_file):
  # GH 38125: do not stringify file objects that are also path-like
  fsspec = pytest.importorskip("fsspec")
- with tm.ensure_clean() as path:
- with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj:
- assert fsspec_obj == icom.stringify_path(fsspec_obj)
+ with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj:
+ assert fsspec_obj == icom.stringify_path(fsspec_obj)
 
  @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
  def test_infer_compression_from_path(self, compression_format, path_type):
@@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath):
  ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
  ],
  )
- def test_write_fspath_all(self, writer_name, writer_kwargs, module):
+ def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path):
  if writer_name in ["to_latex"]: # uses Styler implementation
  pytest.importorskip("jinja2")
- p1 = tm.ensure_clean("string")
- p2 = tm.ensure_clean("fspath")
+ string = str(tmp_path / "string")
+ fspath = str(tmp_path / "fspath")
  df = pd.DataFrame({"A": [1, 2]})
 
- with p1 as string, p2 as fspath:
- pytest.importorskip(module)
- mypath = CustomFSPath(fspath)
- writer = getattr(df, writer_name)
-
- writer(string, **writer_kwargs)
- writer(mypath, **writer_kwargs)
- with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
- if writer_name == "to_excel":
- # binary representation of excel contains time creation
- # data that causes flaky CI failures
- result = pd.read_excel(f_str, **writer_kwargs)
- expected = pd.read_excel(f_path, **writer_kwargs)
- tm.assert_frame_equal(result, expected)
- else:
- result = f_str.read()
- expected = f_path.read()
- assert result == expected
-
- def test_write_fspath_hdf5(self):
+ pytest.importorskip(module)
+ mypath = CustomFSPath(fspath)
+ writer = getattr(df, writer_name)
+
+ writer(string, **writer_kwargs)
+ writer(mypath, **writer_kwargs)
+ with open(string, "rb") as f_str, open(fspath, "rb") as f_path:
+ if writer_name == "to_excel":
+ # binary representation of excel contains time creation
+ # data that causes flaky CI failures
+ result = pd.read_excel(f_str, **writer_kwargs)
+ expected = pd.read_excel(f_path, **writer_kwargs)
+ tm.assert_frame_equal(result, expected)
+ else:
+ result = f_str.read()
+ expected = f_path.read()
+ assert result == expected
+
+ def test_write_fspath_hdf5(self, tmp_path):
  # Same test as write_fspath_all, except HDF5 files aren't
  # necessarily byte-for-byte identical for a given dataframe, so we'll
  # have to read and compare equality
  pytest.importorskip("tables")
 
  df = pd.DataFrame({"A": [1, 2]})
- p1 = tm.ensure_clean("string")
- p2 = tm.ensure_clean("fspath")
+ string = str(tmp_path / "string")
+ fspath = str(tmp_path / "fspath")
 
- with p1 as string, p2 as fspath:
- mypath = CustomFSPath(fspath)
- df.to_hdf(mypath, key="bar")
- df.to_hdf(string, key="bar")
+ mypath = CustomFSPath(fspath)
+ df.to_hdf(mypath, key="bar")
+ df.to_hdf(string, key="bar")
 
-  result = pd.read_hdf(fspath, key="bar")
-  expected = pd.read_hdf(string, key="bar")
+ result = pd.read_hdf(fspath, key="bar")
+ expected = pd.read_hdf(string, key="bar")
 
  tm.assert_frame_equal(result, expected)
 
@@ -432,35 +429,33 @@ def test_next(self, mmap_file):
  with pytest.raises(StopIteration, match=r"^$"):
  next(wrapper)
 
- def test_unknown_engine(self):
- with tm.ensure_clean() as path:
- df = pd.DataFrame(
- 1.1 * np.arange(120).reshape((30, 4)),
- columns=pd.Index(list("ABCD")),
- index=pd.Index([f"i-{i}" for i in range(30)]),
- )
- df.to_csv(path)
- with pytest.raises(ValueError, match="Unknown engine"):
- pd.read_csv(path, engine="pyt")
-
- def test_binary_mode(self):
+ def test_unknown_engine(self, temp_file):
+ df = pd.DataFrame(
+ 1.1 * np.arange(120).reshape((30, 4)),
+ columns=pd.Index(list("ABCD")),
+ index=pd.Index([f"i-{i}" for i in range(30)]),
+ )
+ df.to_csv(temp_file)
+ with pytest.raises(ValueError, match="Unknown engine"):
+ pd.read_csv(temp_file, engine="pyt")
+
+ def test_binary_mode(self, temp_file):
  """
  'encoding' shouldn't be passed to 'open' in binary mode.
 
  GH 35058
  """
- with tm.ensure_clean() as path:
- df = pd.DataFrame(
- 1.1 * np.arange(120).reshape((30, 4)),
- columns=pd.Index(list("ABCD")),
- index=pd.Index([f"i-{i}" for i in range(30)]),
- )
- df.to_csv(path, mode="w+b")
- tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+ df = pd.DataFrame(
+ 1.1 * np.arange(120).reshape((30, 4)),
+ columns=pd.Index(list("ABCD")),
+ index=pd.Index([f"i-{i}" for i in range(30)]),
+ )
+ df.to_csv(temp_file, mode="w+b")
+ tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0))
 
  @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"])
  @pytest.mark.parametrize("compression_", ["bz2", "xz"])
- def test_warning_missing_utf_bom(self, encoding, compression_):
+ def test_warning_missing_utf_bom(self, encoding, compression_, temp_file):
  """
  bz2 and xz do not write the byte order mark (BOM) for utf-16/32.
 
@@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
  columns=pd.Index(list("ABCD")),
  index=pd.Index([f"i-{i}" for i in range(30)]),
  )
- with tm.ensure_clean() as path:
- with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
- df.to_csv(path, compression=compression_, encoding=encoding)
+ with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
+ df.to_csv(temp_file, compression=compression_, encoding=encoding)
 
-  # reading should fail (otherwise we wouldn't need the warning)
-  msg = (
-  r"UTF-\d+ stream does not start with BOM|"
-  r"'utf-\d+' codec can't decode byte"
-  )
-  with pytest.raises(UnicodeError, match=msg):
-  pd.read_csv(path, compression=compression_, encoding=encoding)
+ # reading should fail (otherwise we wouldn't need the warning)
+ msg = (
+ r"UTF-\d+ stream does not start with BOM|"
+ r"'utf-\d+' codec can't decode byte"
+ )
+ with pytest.raises(UnicodeError, match=msg):
+ pd.read_csv(temp_file, compression=compression_, encoding=encoding)
 
 
 def test_is_fsspec_url():
@@ -514,38 +508,36 @@ def test_is_fsspec_url_chained():
 
 
 @pytest.mark.parametrize("format", ["csv", "json"])
-def test_codecs_encoding(format):
+def test_codecs_encoding(format, temp_file):
  # GH39247
  expected = pd.DataFrame(
  1.1 * np.arange(120).reshape((30, 4)),
  columns=pd.Index(list("ABCD")),
  index=pd.Index([f"i-{i}" for i in range(30)]),
  )
- with tm.ensure_clean() as path:
- with open(path, mode="w", encoding="utf-8") as handle:
- getattr(expected, f"to_{format}")(handle)
- with open(path, encoding="utf-8") as handle:
- if format == "csv":
- df = pd.read_csv(handle, index_col=0)
- else:
- df = pd.read_json(handle)
+ with open(temp_file, mode="w", encoding="utf-8") as handle:
+ getattr(expected, f"to_{format}")(handle)
+ with open(temp_file, encoding="utf-8") as handle:
+ if format == "csv":
+ df = pd.read_csv(handle, index_col=0)
+ else:
+ df = pd.read_json(handle)
  tm.assert_frame_equal(expected, df)
 
 
-def test_codecs_get_writer_reader():
+def test_codecs_get_writer_reader(temp_file):
  # GH39247
  expected = pd.DataFrame(
  1.1 * np.arange(120).reshape((30, 4)),
  columns=pd.Index(list("ABCD")),
  index=pd.Index([f"i-{i}" for i in range(30)]),
  )
- with tm.ensure_clean() as path:
- with open(path, "wb") as handle:
- with codecs.getwriter("utf-8")(handle) as encoded:
- expected.to_csv(encoded)
- with open(path, "rb") as handle:
- with codecs.getreader("utf-8")(handle) as encoded:
- df = pd.read_csv(encoded, index_col=0)
+ with open(temp_file, "wb") as handle:
+ with codecs.getwriter("utf-8")(handle) as encoded:
+ expected.to_csv(encoded)
+ with open(temp_file, "rb") as handle:
+ with codecs.getreader("utf-8")(handle) as encoded:
+ df = pd.read_csv(encoded, index_col=0)
  tm.assert_frame_equal(expected, df)
 
 
@@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg):
 
 @pytest.mark.parametrize("encoding_errors", ["strict", "replace"])
 @pytest.mark.parametrize("format", ["csv", "json"])
-def test_encoding_errors(encoding_errors, format):
+def test_encoding_errors(encoding_errors, format, temp_file):
  # GH39450
  msg = "'utf-8' codec can't decode byte"
  bad_encoding = b"\xe4"
@@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format):
  + b'"}}'
  )
  reader = partial(pd.read_json, orient="index")
- with tm.ensure_clean() as path:
- file = Path(path)
- file.write_bytes(content)
+ file = Path(temp_file)
+ file.write_bytes(content)
 
-  if encoding_errors != "replace":
-  with pytest.raises(UnicodeDecodeError, match=msg):
-  reader(path, encoding_errors=encoding_errors)
-  else:
-  df = reader(path, encoding_errors=encoding_errors)
-  decoded = bad_encoding.decode(errors=encoding_errors)
-  expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
-  tm.assert_frame_equal(df, expected)
+ if encoding_errors != "replace":
+ with pytest.raises(UnicodeDecodeError, match=msg):
+ reader(temp_file, encoding_errors=encoding_errors)
+ else:
+ df = reader(temp_file, encoding_errors=encoding_errors)
+ decoded = bad_encoding.decode(errors=encoding_errors)
+ expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2])
+ tm.assert_frame_equal(df, expected)
 
 
 @pytest.mark.parametrize("encoding_errors", [0, None])
@@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors):
  reader(content)
 
 
-def test_bad_encdoing_errors():
+def test_bad_encdoing_errors(temp_file):
  # GH 39777
- with tm.ensure_clean() as path:
- with pytest.raises(LookupError, match="unknown error handler name"):
- icom.get_handle(path, "w", errors="bad")
+ with pytest.raises(LookupError, match="unknown error handler name"):
+ icom.get_handle(temp_file, "w", errors="bad")
 
 
 @pytest.mark.skipif(WASM, reason="limited file system access on WASM")
@@ -653,7 +643,7 @@ def close(self):
 @pytest.mark.parametrize("compression", [None, "infer"])
 def test_read_csv_chained_url_no_error(compression):
  # GH 60100
- tar_file_path = "pandas/tests/io/data/tar/test-csv.tar"
+ tar_file_path = "data/tar/test-csv.tar"
  chained_file_url = f"tar://test.csv::file://{tar_file_path}"
 
  result = pd.read_csv(chained_file_url, compression=compression, sep=";")