@@ -71,6 +71,41 @@ def test_read_empty_dta(self, version):
7171 empty_ds2 = read_stata (path )
7272 tm .assert_frame_equal (empty_ds , empty_ds2 )
7373
74+ @pytest .mark .parametrize ("version" , [114 , 117 , 118 , 119 , None ])
75+ def test_read_empty_dta_with_dtypes (self , version ):
76+ # GH 46240
77+ # Fixing above bug revealed that types are not correctly preserved when
78+ # writing empty DataFrames
79+ empty_df_typed = DataFrame (
80+ {
81+ "i8" : np .array ([0 ], dtype = np .int8 ),
82+ "i16" : np .array ([0 ], dtype = np .int16 ),
83+ "i32" : np .array ([0 ], dtype = np .int32 ),
84+ "i64" : np .array ([0 ], dtype = np .int64 ),
85+ "u8" : np .array ([0 ], dtype = np .uint8 ),
86+ "u16" : np .array ([0 ], dtype = np .uint16 ),
87+ "u32" : np .array ([0 ], dtype = np .uint32 ),
88+ "u64" : np .array ([0 ], dtype = np .uint64 ),
89+ "f32" : np .array ([0 ], dtype = np .float32 ),
90+ "f64" : np .array ([0 ], dtype = np .float64 ),
91+ }
92+ )
93+ expected = empty_df_typed .copy ()
94+ # No uint# support. Downcast since values in range for int#
95+ expected ["u8" ] = expected ["u8" ].astype (np .int8 )
96+ expected ["u16" ] = expected ["u16" ].astype (np .int16 )
97+ expected ["u32" ] = expected ["u32" ].astype (np .int32 )
98+ # No int64 supported at all. Downcast since values in range for int32
99+ expected ["u64" ] = expected ["u64" ].astype (np .int32 )
100+ expected ["i64" ] = expected ["i64" ].astype (np .int32 )
101+
102+ # GH 7369, make sure can read a 0-obs dta file
103+ with tm .ensure_clean () as path :
104+ empty_df_typed .to_stata (path , write_index = False , version = version )
105+ empty_reread = read_stata (path )
106+ tm .assert_frame_equal (expected , empty_reread )
107+ tm .assert_series_equal (expected .dtypes , empty_reread .dtypes )
108+
74109 @pytest .mark .parametrize ("version" , [114 , 117 , 118 , 119 , None ])
75110 def test_read_index_col_none (self , version ):
76111 df = DataFrame ({"a" : range (5 ), "b" : ["b1" , "b2" , "b3" , "b4" , "b5" ]})
@@ -2274,3 +2309,21 @@ def test_nullable_support(dtype, version):
22742309 tm .assert_series_equal (df .a , reread .a )
22752310 tm .assert_series_equal (reread .b , expected_b )
22762311 tm .assert_series_equal (reread .c , expected_c )
2312+
2313+
2314+ def test_empty_frame ():
2315+ # GH 46240
2316+ # create an empty DataFrame with int64 and float64 dtypes
2317+ df = DataFrame (data = {"a" : range (3 ), "b" : [1.0 , 2.0 , 3.0 ]}).head (0 )
2318+ with tm .ensure_clean () as path :
2319+ df .to_stata (path , write_index = False , version = 117 )
2320+ # Read entire dataframe
2321+ df2 = read_stata (path )
2322+ assert "b" in df2
2323+ # Dtypes don't match since no support for int32
2324+ dtypes = Series ({"a" : np .dtype ("int32" ), "b" : np .dtype ("float64" )})
2325+ tm .assert_series_equal (df2 .dtypes , dtypes )
2326+ # read one column of empty .dta file
2327+ df3 = read_stata (path , columns = ["a" ])
2328+ assert "b" not in df3
2329+ tm .assert_series_equal (df3 .dtypes , dtypes .loc [["a" ]])
0 commit comments