googleapis
diff --git a/‎google/cloud/aiplatform/datasets/time_series_dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎google/cloud/aiplatform/datasets/time_series_dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎google/cloud/aiplatform/training_jobs.py‎
Lines changed: 91 additions & 7 deletions b/‎google/cloud/aiplatform/training_jobs.py‎
Lines changed: 91 additions & 7 deletions
@@ -46,7 +46,7 @@ def create(
  encryption_spec_key_name: Optional[str] = None,
  sync: bool = True,
  ) -> "TimeSeriesDataset":
- """Creates a new tabular dataset.
+ """Creates a new time series dataset.
 
  Args:
  display_name (str):
 
@@ -18,6 +18,7 @@
 import datetime
 import time
 from typing import Dict, List, Optional, Sequence, Tuple, Union
+import warnings
 
 import abc
 
@@ -2525,6 +2526,7 @@ def __init__(
  display_name: str,
  optimization_prediction_type: str,
  optimization_objective: Optional[str] = None,
+ column_specs: Optional[Dict[str, str]] = None,
  column_transformations: Optional[Union[Dict, List[Dict]]] = None,
  optimization_objective_recall_value: Optional[float] = None,
  optimization_objective_precision_value: Optional[float] = None,
@@ -2536,6 +2538,15 @@ def __init__(
  ):
  """Constructs a AutoML Tabular Training Job.
 
+ Example usage:
+
+ job = training_jobs.AutoMLTabularTrainingJob(
+ display_name="my_display_name",
+ optimization_prediction_type="classification",
+ optimization_objective="minimize-log-loss",
+ column_specs={"column_1": "auto", "column_2": "numeric"},
+ )
+
  Args:
  display_name (str):
  Required. The user-defined name of this TrainingPipeline.
@@ -2576,15 +2587,29 @@ def __init__(
  "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE).
  "minimize-mae" - Minimize mean-absolute error (MAE).
  "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE).
- column_transformations (Optional[Union[Dict, List[Dict]]]):
+ column_specs (Dict[str, str]):
+ Optional. Alternative to column_transformations where the keys of the dict
+ are column names and their respective values are one of
+ AutoMLTabularTrainingJob.column_data_types.
+ When creating transformation for BigQuery Struct column, the column
+ should be flattened using "." as the delimiter. Only columns with no child
+ should have a transformation.
+ If an input column has no transformations on it, such a column is
+ ignored by the training, except for the targetColumn, which should have
+ no transformations defined on.
+ Only one of column_transformations or column_specs should be passed.
+ column_transformations (Union[Dict, List[Dict]]):
  Optional. Transformations to apply to the input columns (i.e. columns other
  than the targetColumn). Each transformation may produce multiple
  result values from the column's value, and all are used for training.
  When creating transformation for BigQuery Struct column, the column
- should be flattened using "." as the delimiter.
+ should be flattened using "." as the delimiter. Only columns with no child
+ should have a transformation.
  If an input column has no transformations on it, such a column is
  ignored by the training, except for the targetColumn, which should have
  no transformations defined on.
+ Only one of column_transformations or column_specs should be passed.
+ Consider using column_specs as column_transformations will be deprecated eventually.
  optimization_objective_recall_value (float):
  Optional. Required when maximize-precision-at-recall optimizationObjective was
  picked, represents the recall value at which the optimization is done.
@@ -2628,6 +2653,9 @@ def __init__(
  If set, the trained Model will be secured by this key.
 
  Overrides encryption_spec_key_name set in aiplatform.init.
+
+ Raises:
+ ValueError: When both column_transforations and column_specs were passed
  """
  super().__init__(
  display_name=display_name,
@@ -2637,7 +2665,26 @@ def __init__(
  training_encryption_spec_key_name=training_encryption_spec_key_name,
  model_encryption_spec_key_name=model_encryption_spec_key_name,
  )
- self._column_transformations = column_transformations
+ # user populated transformations
+ if column_transformations is not None and column_specs is not None:
+ raise ValueError(
+ "Both column_transformations and column_specs were passed. Only one is allowed."
+ )
+ if column_transformations is not None:
+ self._column_transformations = column_transformations
+ warnings.simplefilter("always", DeprecationWarning)
+ warnings.warn(
+ "consider using column_specs instead. column_transformations will be deprecated in the future.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ elif column_specs is not None:
+ self._column_transformations = [
+ {transformation: {"column_name": column_name}}
+ for column_name, transformation in column_specs.items()
+ ]
+ else:
+ self._column_transformations = None
  self._optimization_objective = optimization_objective
  self._optimization_prediction_type = optimization_prediction_type
  self._optimization_objective_recall_value = optimization_objective_recall_value
@@ -2860,6 +2907,7 @@ def _run(
 
  training_task_definition = schema.training_job.definition.automl_tabular
 
+ # auto-populate transformations
  if self._column_transformations is None:
  _LOGGER.info(
  "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations."
@@ -2870,21 +2918,19 @@ def _run(
  for column_name in dataset.column_names
  if column_name != target_column
  ]
- column_transformations = [
+ self._column_transformations = [
  {"auto": {"column_name": column_name}} for column_name in column_names
  ]
 
  _LOGGER.info(
  "The column transformation of type 'auto' was set for the following columns: %s."
  % column_names
  )
- else:
- column_transformations = self._column_transformations
 
  training_task_inputs_dict = {
  # required inputs
  "targetColumn": target_column,
- "transformations": column_transformations,
+ "transformations": self._column_transformations,
  "trainBudgetMilliNodeHours": budget_milli_node_hours,
  # optional inputs
  "weightColumnName": weight_column,
@@ -2935,6 +2981,44 @@ def _add_additional_experiments(self, additional_experiments: List[str]):
  """
  self._additional_experiments.extend(additional_experiments)
 
+ @staticmethod
+ def get_auto_column_specs(
+ dataset: datasets.TabularDataset, target_column: str,
+ ) -> Dict[str, str]:
+ """Returns a dict with all non-target columns as keys and 'auto' as values.
+
+ Example usage:
+
+ column_specs = training_jobs.AutoMLTabularTrainingJob.get_auto_column_specs(
+ dataset=my_dataset,
+ target_column="my_target_column",
+ )
+
+ Args:
+ dataset (datasets.TabularDataset):
+ Required. Intended dataset.
+ target_column(str):
+ Required. Intended target column.
+ Returns:
+ Dict[str, str]
+ Column names as keys and 'auto' as values
+ """
+ column_names = [
+ column for column in dataset.column_names if column != target_column
+ ]
+ column_specs = {column: "auto" for column in column_names}
+ return column_specs
+
+ class column_data_types:
+ AUTO = "auto"
+ NUMERIC = "numeric"
+ CATEGORICAL = "categorical"
+ TIMESTAMP = "timestamp"
+ TEXT = "text"
+ REPEATED_NUMERIC = "repeated_numeric"
+ REPEATED_CATEGORICAL = "repeated_categorical"
+ REPEATED_TEXT = "repeated_text"
+
 
 class AutoMLForecastingTrainingJob(_TrainingJob):
  _supported_training_schemas = (schema.training_job.definition.automl_forecasting,)