Skip to content

Commit 4665c11

Browse files
Completes OPEN-2955 Validate all dataframe column dtypes
1 parent 7a82188 commit 4665c11

File tree

1 file changed

+23
-5
lines changed

1 file changed

+23
-5
lines changed

openlayer/__init__.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -954,13 +954,31 @@ def add_dataset(
954954
zero_indexed_set = set(range(len(class_names)))
955955
if unique_labels != zero_indexed_set:
956956
raise exceptions.OpenlayerResourceError(
957-
context=f"There's an issue with values in the column `{label_column_name}` of the dataset. \n",
958-
message=f"The labels in `{label_column_name}` must be zero-indexed integer values. \n",
959-
mitigation="Make sure to upload a dataset with zero-indexed integer labels that match "
960-
f"the list in `class_names`. For example, the class `{class_names[0]}` should be represented as a 0 in the dataset, "
961-
f" the class `{class_names[1]}` should be a 1, and so on.",
957+
context=f"There's an issue with values in the column "
958+
f"`{label_column_name}` of the dataset. \n",
959+
message=f"The labels in `{label_column_name}` must be "
960+
"zero-indexed integer values. \n",
961+
mitigation="Make sure to upload a dataset with zero-indexed "
962+
"integer labels that match the list in `class_names`. "
963+
f"For example, the class `{class_names[0]}` should be "
964+
"represented as a 0 in the dataset, the class "
965+
f"`{class_names[1]}` should be a 1, and so on.",
962966
) from None
963967

968+
# Validating the column dtypes
969+
supported_dtypes = {"float32", "float64", "int32", "int64", "object"}
970+
error_msg = ""
971+
for col in df:
972+
dtype = df[col].dtype.name
973+
if dtype not in supported_dtypes:
974+
error_msg += f"- Column `{col}` is of dtype {dtype}. \n"
975+
if error_msg:
976+
raise exceptions.OpenlayerResourceError(
977+
context="There is an issue with some of the columns dtypes.\n",
978+
message=error_msg,
979+
mitigation=f"The supported dtypes are {supported_dtypes}. "
980+
"Make sure to cast the above columns to a supported dtype.",
981+
) from None
964982
# ------------------ Resource-schema consistency validations ----------------- #
965983
# Label column validations
966984
try:

0 commit comments

Comments
 (0)