Skip to content
Prev Previous commit
Next Next commit
adding back validation df and fixing minor bugs
  • Loading branch information
gbayomi authored and gustavocidornelas committed Sep 28, 2022
commit d306d5e69435b304e46263c29d75e504682780b0
24 changes: 19 additions & 5 deletions openlayer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,12 +1213,13 @@ def add_baseline(
task_type: TaskType,
class_names: List[str],
label_column_name: str,
commit_message: str,
train_df: pd.DataFrame = None,
val_df: pd.DataFrame = None,
ensemble_size: int = 10,
random_seed: int = 0,
timeout: int = 60,
per_run_limit: int = None,
commit_message: Optional[str] = None,
project_id: str = None,
) -> Model:
"""Add a baseline model to the Unbox platform. You only need to specify a training set
Expand All @@ -1236,6 +1237,8 @@ def add_baseline(
E.g. `['positive', 'negative']`.
label_column_name : str
Column containing dataset labels
commit_message : str
Commit message for the model version.
train_df : pd.DataFrame, default None
Training set dataframe.
ensemble_size : int, default 10
Expand All @@ -1246,8 +1249,6 @@ def add_baseline(
Maximum time to train all the models.
per_run_limit : int, default None
Maximum time to train each model.
commit_message : str, default None
Commit message for the model version.

Returns
-------
Expand Down Expand Up @@ -1331,7 +1332,7 @@ def add_baseline(
categorical_feature_names = qb.get_categorical_feature_names(train_features_df)

# Train model
print(f"Training model for approximately {round(0.0166 * timeout, 2)} minutes")
print(f"Training model for approximately {round(0.0166 * timeout, 2)} minute(s).")
model = qb.train_auto_classifiers(
timeout=timeout,
per_run_limit=per_run_limit,
Expand All @@ -1347,6 +1348,19 @@ def add_baseline(
f.write("Automunge==8.30\n")
f.write("scikit-learn== 0.24.1")

if val_df is not None:
self.add_dataframe(
df=val_df,
task_type=task_type,
project_id=project_id,
class_names=class_names,
label_column_name=label_column_name,
commit_message=commit_message,
feature_names=col_names,
categorical_feature_names=categorical_feature_names,
)


# Upload model
model_info = self.add_model(
function=predict_proba,
Expand All @@ -1358,7 +1372,7 @@ def add_baseline(
name=f"Baseline model",
commit_message=commit_message,
feature_names=col_names,
train_sample_df=pd.sample(train_df, n=3000, random_state=random_seed),
train_sample_df=train_df.sample(n=3000, random_state=random_seed),
train_sample_label_column_name=label_column_name,
categorical_feature_names=categorical_feature_names,
requirements_txt_file="auto-requirements.txt",
Expand Down