automl · nabenabe0928 · Mar 11, 2021 · Feb 23, 2021 · Mar 1, 2021 · Mar 1, 2021
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -29,7 +29,7 @@ jobs:
  - name: Run tests
  run: |
  if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi
- python -m pytest --durations=20 --timeout=300 --timeout-method=thread -v $codecov test
+ python -m pytest --durations=20 --timeout=600 --timeout-method=signal -v $codecov test
  - name: Check for files left behind by test
  if: ${{ always() }}
  run: |

diff --git a/autoPyTorch/ensemble/ensemble_builder.py b/autoPyTorch/ensemble/ensemble_builder.py
diff --git a/autoPyTorch/ensemble/ensemble_selection.py b/autoPyTorch/ensemble/ensemble_selection.py
@@ -6,7 +6,7 @@
 from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
-from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
+from autoPyTorch.pipeline.components.training.metrics.utils import calculate_loss
 
 
 class EnsembleSelection(AbstractEnsemble):
@@ -71,60 +71,47 @@ def _fit(
  dtype=np.float64,
  )
  for i in range(ensemble_size):
- scores = np.zeros(
+ losses = np.zeros(
  (len(predictions)),
  dtype=np.float64,
  )
  s = len(ensemble)
- if s == 0:
- weighted_ensemble_prediction.fill(0.0)
- else:
- weighted_ensemble_prediction.fill(0.0)
- for pred in ensemble:
- np.add(
- weighted_ensemble_prediction,
- pred,
- out=weighted_ensemble_prediction,
- )
- np.multiply(
- weighted_ensemble_prediction,
- 1 / s,
- out=weighted_ensemble_prediction,
- )
- np.multiply(
+ if s > 0:
+ np.add(
  weighted_ensemble_prediction,
- (s / float(s + 1)),
+ ensemble[-1],
  out=weighted_ensemble_prediction,
  )
 
+ # Memory-efficient averaging!
  for j, pred in enumerate(predictions):
- # Memory-efficient averaging!
- fant_ensemble_prediction.fill(0.0)
+ # fant_ensemble_prediction is the prediction of the current ensemble
+ # and should be ([predictions[selected_prev_iterations] + predictions[j])/(s+1)
+ # We overwrite the contents of fant_ensemble_prediction
+ # directly with weighted_ensemble_prediction + new_prediction and then scale for avg
  np.add(
- fant_ensemble_prediction,
  weighted_ensemble_prediction,
+ pred,
  out=fant_ensemble_prediction
  )
- np.add(
+ np.multiply(
  fant_ensemble_prediction,
- (1. / float(s + 1)) * pred,
+ (1. / float(s + 1)),
  out=fant_ensemble_prediction
  )
 
- # Calculate score is versatile and can return a dict of score
- # when all_scoring_functions=False, we know it will be a float
- score = calculate_score(
+ # Calculate loss is versatile and can return a dict of slosses
+ losses[j] = calculate_loss(
  metrics=[self.metric],
  target=labels,
  prediction=fant_ensemble_prediction,
  task_type=self.task_type,
- )
- scores[j] = self.metric._optimum - score[self.metric.name]
+ )[self.metric.name]
 
- all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
+ all_best = np.argwhere(losses == np.nanmin(losses)).flatten()
  best = self.random_state.choice(all_best)
  ensemble.append(predictions[best])
- trajectory.append(scores[best])
+ trajectory.append(losses[best])
  order.append(best)
 
  # Handle special case
@@ -133,7 +120,7 @@ def _fit(
 
  self.indices_ = order
  self.trajectory_ = trajectory
- self.train_score_ = trajectory[-1]
+ self.train_loss_ = trajectory[-1]
 
  def _calculate_weights(self) -> None:
  ensemble_members = Counter(self.indices_).most_common()

diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -38,7 +38,7 @@
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
 from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
 from autoPyTorch.pipeline.components.training.metrics.utils import (
- calculate_score,
+ calculate_loss,
  get_metrics,
 )
 from autoPyTorch.utils.backend import Backend
@@ -364,30 +364,21 @@ def _get_pipeline(self) -> BaseEstimator:
  def _loss(self, y_true: np.ndarray, y_hat: np.ndarray) -> Dict[str, float]:
  """SMAC follows a minimization goal, so the make_scorer
  sign is used as a guide to obtain the value to reduce.
+ The calculate_loss internally translate a score function to
+ a minimization problem
 
- On this regard, to optimize a metric:
- 1- score is calculared with calculate_score, with the caveat, that if
- for the metric greater is not better, a negative score is returned.
- 2- the err (the optimization goal) is then:
- optimum - (metric.sign * actual_score)
- For accuracy for example: optimum(1) - (+1 * actual score)
- For logloss for example: optimum(0) - (-1 * actual score)
  """
 
  if not isinstance(self.configuration, Configuration):
- return {self.metric.name: 1.0}
+ return {self.metric.name: self.metric._worst_possible_result}
 
  if self.additional_metrics is not None:
  metrics = self.additional_metrics
  else:
  metrics = [self.metric]
- score = calculate_score(
- y_true, y_hat, self.task_type, metrics)
-
- err = {metric.name: metric._optimum - score[metric.name] for metric in metrics
- if metric.name in score.keys()}
 
- return err
+ return calculate_loss(
+ y_true, y_hat, self.task_type, metrics)
 
  def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
  opt_pred: np.ndarray, valid_pred: Optional[np.ndarray],

diff --git a/autoPyTorch/pipeline/components/training/metrics/utils.py b/autoPyTorch/pipeline/components/training/metrics/utils.py
@@ -104,17 +104,17 @@ def get_metrics(dataset_properties: Dict[str, Any],
 
 
 def calculate_score(
-  target: np.ndarray,
-  prediction: np.ndarray,
-  task_type: int,
-  metrics: Iterable[autoPyTorchMetric],
+ target: np.ndarray,
+ prediction: np.ndarray,
+ task_type: int,
+ metrics: Iterable[autoPyTorchMetric],
 ) -> Dict[str, float]:
  score_dict = dict()
  if task_type in REGRESSION_TASKS:
  cprediction = sanitize_array(prediction)
  for metric_ in metrics:
  try:
- score_dict[metric_.name] = metric_(target, cprediction)
+ score_dict[metric_.name] = metric_._sign * metric_(target, cprediction)
  except ValueError as e:
  warnings.warn(f"{e} {e.args[0]}")
  if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
@@ -126,7 +126,7 @@ def calculate_score(
  else:
  for metric_ in metrics:
  try:
- score_dict[metric_.name] = metric_(target, prediction)
+ score_dict[metric_.name] = metric_._sign * metric_(target, prediction)
  except ValueError as e:
  if e.args[0] == 'multiclass format is not supported':
  continue
@@ -143,3 +143,49 @@ def calculate_score(
  else:
  raise e
  return score_dict
+
+
+def calculate_loss(
+ target: np.ndarray,
+ prediction: np.ndarray,
+ task_type: int,
+ metrics: Iterable[autoPyTorchMetric],
+) -> Dict[str, float]:
+ """
+ Returns a loss (a magnitude that allows casting the
+ optimization problem, as a minimization one) for the
+ given Auto-Sklearn Scorer object
+ Parameters
+ ----------
+ solution: np.ndarray
+ The ground truth of the targets
+ prediction: np.ndarray
+ The best estimate from the model, of the given targets
+ task_type: int
+ To understand if the problem task is classification
+ or regression
+ metric: Scorer
+ Object that host a function to calculate how good the
+ prediction is according to the solution.
+ scoring_functions: List[Scorer]
+ A list of metrics to calculate multiple losses
+ Returns
+ -------
+ float or Dict[str, float]
+ A loss function for each of the provided scorer objects
+ """
+ score = calculate_score(
+ target=target,
+ prediction=prediction,
+ task_type=task_type,
+ metrics=metrics,
+ )
+
+ loss_dict = dict()
+ for metric_ in metrics:
+ # TODO: When metrics are annotated with type_of_target support
+ # we can remove this check
+ if metric_.name not in score:
+ continue
+ loss_dict[metric_.name] = metric_._optimum - metric_._sign * score[metric_.name]
+ return loss_dict
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
@@ -83,7 +83,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
  '.autoPyTorch/ensemble_read_preds.pkl',
  '.autoPyTorch/start_time_1',
  '.autoPyTorch/ensemble_history.json',
- '.autoPyTorch/ensemble_read_scores.pkl',
+ '.autoPyTorch/ensemble_read_losses.pkl',
  '.autoPyTorch/true_targets_ensemble.npy',
  ]
  for expected_file in expected_files:
@@ -244,7 +244,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
  '.autoPyTorch/ensemble_read_preds.pkl',
  '.autoPyTorch/start_time_1',
  '.autoPyTorch/ensemble_history.json',
- '.autoPyTorch/ensemble_read_scores.pkl',
+ '.autoPyTorch/ensemble_read_losses.pkl',
  '.autoPyTorch/true_targets_ensemble.npy',
  ]
  for expected_file in expected_files: