aws · jaidisido · Mar 19, 2024 · Mar 18, 2024 · Mar 19, 2024
diff --git a/awswrangler/athena/_write_iceberg.py b/awswrangler/athena/_write_iceberg.py
@@ -108,6 +108,9 @@ def _determine_differences(
  )
  frame_columns_types.update(frame_partitions_types)
 
+ # lowercase DataFrame columns, as all the column names from Athena will be lowercased
+ frame_columns_types = {k.lower(): v for k, v in frame_columns_types.items()}
+
  catalog_column_types = typing.cast(
  Dict[str, str],
  catalog.get_table_types(database=database, table=table, catalog_id=catalog_id, boto3_session=boto3_session),

diff --git a/tests/unit/test_athena_iceberg.py b/tests/unit/test_athena_iceberg.py
@@ -870,3 +870,57 @@ def test_athena_iceberg_use_partition_function(
 
  assert len(df_out) == len(df) + len(df2)
  assert len(df_out.columns) == len(df.columns)
+
+
+def test_to_iceberg_uppercase_columns(
+ path: str,
+ path2: str,
+ path3: str,
+ glue_database: str,
+ glue_table: str,
+) -> None:
+ df = pd.DataFrame(
+ {
+ "ID": [1, 2, 3, 4, 5],
+ "TS": [
+ ts("2020-01-01 00:00:00.0"),
+ ts("2020-01-02 00:00:01.0"),
+ ts("2020-01-03 00:00:00.0"),
+ ts("2020-01-03 12:30:00.0"),
+ ts("2020-01-03 16:45:00.0"),
+ ],
+ }
+ )
+ df["ID"] = df["ID"].astype("Int64") # Cast as nullable int64 type
+
+ split_index = 4
+
+ wr.athena.to_iceberg(
+ df=df.iloc[:split_index],
+ database=glue_database,
+ table=glue_table,
+ table_location=path,
+ temp_path=path2,
+ keep_files=False,
+ )
+
+ wr.athena.to_iceberg(
+ df=df.iloc[split_index:],
+ database=glue_database,
+ table=glue_table,
+ table_location=path,
+ temp_path=path2,
+ s3_output=path3,
+ keep_files=False,
+ mode="append",
+ schema_evolution=True,
+ )
+
+ df_output = wr.athena.read_sql_query(
+ sql=f'SELECT ID, TS FROM "{glue_table}" ORDER BY ID',
+ database=glue_database,
+ ctas_approach=False,
+ unload_approach=False,
+ )
+
+ assert_pandas_equals(df, df_output)