l0lawrence
diff --git a/‎sdk/ai/azure-ai-generative/MANIFEST.in‎
Lines changed: 1 addition & 1 deletion b/‎sdk/ai/azure-ai-generative/MANIFEST.in‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/__init__.py‎
Lines changed: 9 additions & 1 deletion b/‎sdk/ai/azure-ai-generative/azure/ai/generative/__init__.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/entities/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎sdk/ai/azure-ai-generative/azure/ai/generative/entities/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/entities/configs.py‎
Lines changed: 39 additions & 0 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/entities/configs.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_base_handler.py‎
Lines changed: 0 additions & 3 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_base_handler.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py‎
Lines changed: 64 additions & 38 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py‎
Lines changed: 64 additions & 38 deletions
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_local_code_handler.py‎
Lines changed: 3 additions & 5 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_local_code_handler.py‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_metric_handler.py‎
Lines changed: 1 addition & 0 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_metric_handler.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_mlflow_log_collector.py‎
Lines changed: 3 additions & 2 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_mlflow_log_collector.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_utils.py‎
Lines changed: 26 additions & 2 deletions b/‎sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_utils.py‎
Lines changed: 26 additions & 2 deletions
@@ -4,6 +4,6 @@ include *.md
 include azure/__init__.py
 include azure/ai/__init__.py
 include azure/ai/generative/py.typed
-include azure/ai/generative/operations/component-configs/*
 include azure/ai/generative/index/_utils/encodings/*
 recursive-include azure/ai/generative/synthetic/templates *.txt
+recursive-include azure/ai/generative/synthetic/simulator/templates *.md
@@ -2,11 +2,19 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
+__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore
+
+import logging
+
 from ._ai_client import AIClient
-from ._version import VERSION
+from ._telemetry import initialize_logger_info
+
+module_logger = logging.getLogger(__name__)
+initialize_logger_info(module_logger, terminator="\n")
 
 __all__ = [
  "AIClient",
 ]
 
+VERSION = "0.1.0"
 __version__ = VERSION
@@ -10,5 +10,6 @@
 from .mlindex import MLIndex
 from .project import Project
 from .data import Data
+from .configs import AzureOpenAIModelConfiguration
 
-__all__ = ["Connection", "MLIndex", "Project", "AIResource", "Data"]
+__all__ = ["Connection", "MLIndex", "Project", "AIResource", "Data", "AzureOpenAIModelConfiguration"]
@@ -0,0 +1,39 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+from dataclasses import dataclass
+from typing import Any, Dict
+from azure.ai.generative.entities.connection import Connection
+from azure.ai.ml._utils.utils import camel_to_snake
+
+
+
+@dataclass
+class AzureOpenAIModelConfiguration:
+ api_base: str
+ api_key: str
+ api_version: str
+ model_name: str
+ deployment_name: str
+ model_kwargs: Dict[str, Any]
+
+ @staticmethod
+ def from_connection(
+ connection: Connection, model_name: str, deployment_name: str, **model_kwargs
+ ) -> 'AzureOpenAIModelConfiguration':
+ if not isinstance(connection, Connection) or camel_to_snake(connection.type) != "azure_open_ai":
+ raise TypeError(
+ "Only AzureOpenAI connection objects are supported."
+ )
+ key = connection.credentials.get("key")
+ if key is None:
+ raise ValueError("Unable to retrieve openai key from connection object.")
+
+ return AzureOpenAIModelConfiguration(
+ api_base=connection.target,
+ api_key=connection.credentials.get("key"),
+ api_version=connection.metadata.get("ApiVersion"),
+ model_name=model_name,
+ deployment_name=deployment_name,
+ model_kwargs=model_kwargs,
+ )
@@ -26,9 +26,6 @@ def __init__(self, asset, test_data, prediction_data=None, ground_truth=None, **
 
  self._test_data = test_data_df
 
- if self._prediction_data is None:
- self._prediction_data = prediction_data
-
  self.params_dict = kwargs.pop("params_dict", None)
 
  @property
 
@@ -21,9 +21,11 @@
 from azure.ai.generative.evaluate._utils import _is_flow, load_jsonl, _get_artifact_dir_path
 from azure.ai.generative.evaluate._mlflow_log_collector import RedirectUserOutputStreams
 
+from ._utils import _write_properties_to_run_history
 
 LOGGER = logging.getLogger(__name__)
 
+
 def _get_handler_class(
  asset,
 ):
@@ -75,55 +77,58 @@ def _log_metrics(run_id, metrics):
 
 def evaluate(
  evaluation_name=None,
- asset=None,
- asset_type=None,
+ target=None,
  data=None,
- truth_data=None,
- prediction_data=None,
  task_type=None,
- metrics_config=None,
- params=None,
- metrics=None,
+ sweep_args=None,
+ metrics_list=None,
+ model_config=None,
+ data_mapping=None,
  **kwargs
 ):
  results_list = []
+ metrics_config = {}
  if "tracking_uri" in kwargs:
  mlflow.set_tracking_uri(kwargs.get("tracking_uri"))
 
- if params:
+ if model_config:
+ metrics_config.update({"openai_params": model_config})
+
+ if data_mapping:
+ metrics_config.update(data_mapping)
+
+ if sweep_args:
  import itertools
- keys, values = zip(*params.items())
+ keys, values = zip(*sweep_args.items())
  params_permutations_dicts = [dict(zip(keys, v)) for v in itertools.product(*values)]
 
  with mlflow.start_run(run_name=evaluation_name) as run:
- log_param_and_tag("_azureml.evaluation_run", True)
+ log_property_and_tag("_azureml.evaluation_run", "azure-ai-generative")
  for index, params_permutations_dict in enumerate(params_permutations_dicts):
  evaluation_name_variant = f"{evaluation_name}_{index}" if evaluation_name else f"{run.info.run_name}_{index}"
 
  evaluation_results = _evaluate(
  evaluation_name=evaluation_name_variant,
- asset=asset,
+ target=target,
  data=data,
- truth_data=truth_data,
- prediction_data=prediction_data,
  task_type=task_type,
- metrics_config=metrics_config,
+ model_config=model_config,
+ data_mapping=data_mapping,
  params_dict=params_permutations_dict,
- metrics=metrics,
+ metrics=metrics_list,
  **kwargs
  )
  results_list.append(evaluation_results)
  return results_list
  else:
  evaluation_result = _evaluate(
  evaluation_name=evaluation_name,
- asset=asset,
+ target=target,
  data=data,
- truth_data=truth_data,
- prediction_data=prediction_data,
  task_type=task_type,
- metrics_config=metrics_config,
- metrics=metrics,
+ model_config=model_config,
+ data_mapping=data_mapping,
+ metrics=metrics_list,
  **kwargs
  )
 
@@ -132,14 +137,14 @@ def evaluate(
 
 def _evaluate(
  evaluation_name=None,
- asset=None,
- asset_type=None,
+ target=None,
  data=None,
  truth_data=None,
  prediction_data=None,
  task_type=None,
- metrics_config=None,
  metrics=None,
+ data_mapping=None,
+ model_config=None,
  **kwargs
 ):
  try:
@@ -151,23 +156,36 @@ def _evaluate(
  test_data = data
  _data_is_file = False
 
- if asset is None and prediction_data is None:
- raise Exception("asset and prediction data cannot be null")
+ if "y_pred" in data_mapping:
+ prediction_data = data_mapping.get("y_pred")
+
+ if "y_test" in data_mapping:
+ truth_data = data_mapping.get("y_test")
+
+ if target is None and prediction_data is None:
+ raise Exception("target and prediction data cannot be null")
 
  if task_type not in [constants.Tasks.QUESTION_ANSWERING, constants.Tasks.CHAT_COMPLETION]:
  raise Exception(f"task type {task_type} is not supported")
 
- with mlflow.start_run(nested=True if mlflow.active_run() else False, run_name=evaluation_name) as run,\
- RedirectUserOutputStreams(logger=LOGGER) as _:
- 
- log_param_and_tag("_azureml.evaluation_run", True) 
+ metrics_config = {}
+ if model_config:
+ metrics_config.update({"openai_params": model_config})
+
+ if data_mapping:
+ metrics_config.update(data_mapping)
+
+ with mlflow.start_run(nested=True if mlflow.active_run() else False, run_name=evaluation_name) as run, \
+ RedirectUserOutputStreams(logger=LOGGER) as _:
+
+ log_property_and_tag("_azureml.evaluation_run", "azure-ai-generative")
  # Log input is a preview feature behind an allowlist. Uncomment this line once the feature is broadly available.
  # log_input(data=data, data_is_file=_data_is_file)
 
- asset_handler_class = _get_handler_class(asset)
+ asset_handler_class = _get_handler_class(target)
 
  asset_handler = asset_handler_class(
- asset=asset,
+ asset=target,
  prediction_data=prediction_data,
  ground_truth=truth_data,
  test_data=test_data,
@@ -211,7 +229,7 @@ def _get_instance_table():
 
  with tempfile.TemporaryDirectory() as tmpdir:
  for param_name, param_value in kwargs.get("params_dict", {}).items():
- 
+
  try:
  mlflow.log_param(param_name, param_value)
  except MlflowException as ex:
@@ -220,8 +238,9 @@ def _get_instance_table():
  # But since we control how params are logged, this is prob fine for now.
 
  if ex.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE):
- LOGGER.warning(f"Parameter {param_name} value is too long to log. Truncating and logging it as an artifact.")
- 
+ LOGGER.warning(
+ f"Parameter {param_name} value is too long to log. Truncating and logging it as an artifact.")
+
  # Truncate the value to 500 bytes and log it.
  truncated_value = param_value.encode('utf-8')[:500].decode('utf-8', 'ignore')
  mlflow.log_param(param_name, truncated_value)
@@ -237,20 +256,22 @@ def _get_instance_table():
  eval_artifact_df = _get_instance_table().to_json(orient="records", lines=True, force_ascii=False)
  tmp_path = os.path.join(tmpdir, "eval_results.jsonl")
 
- with open(tmp_path, "w") as f:
+ with open(tmp_path, "w", encoding="utf-8") as f:
  f.write(eval_artifact_df)
 
  mlflow.log_artifact(tmp_path)
- log_param_and_tag("_azureml.evaluate_artifacts", json.dumps([{"path": "eval_results.jsonl", "type": "table"}]))
+ log_property_and_tag("_azureml.evaluate_artifacts",
+ json.dumps([{"path": "eval_results.jsonl", "type": "table"}]))
  mlflow.log_param("task_type", task_type)
  log_param_and_tag("_azureml.evaluate_metric_mapping", json.dumps(metrics_handler._metrics_mapping_to_log))
 
  return metrics
 
+
 def log_input(data, data_is_file):
  try:
- # Mlflow service supports only uri_folder, hence this is need to create a dir to log input data.
- # once support is extended, we can revisit this logic
+  # Mlflow service supports only uri_folder, hence this is need to create a dir to log input data.
+  # once support is extended, we can revisit this logic
  with tempfile.TemporaryDirectory() as tempdir:
  if data_is_file:
  file_name = os.path.basename(data)
@@ -271,6 +292,11 @@ def log_input(data, data_is_file):
  LOGGER.error("Error logging data as dataset, continuing without it")
  LOGGER.exception(ex, stack_info=True)
 
+
 def log_param_and_tag(key, value):
  mlflow.log_param(key, value)
  mlflow.set_tag(key, value)
+
+def log_property_and_tag(key, value, logger=LOGGER):
+ _write_properties_to_run_history({key: value}, logger)
+ mlflow.set_tag(key, value)
@@ -26,10 +26,8 @@ def generate_prediction_data(self):
  # TODO: Check if this is the right place for this logic
  prediction_data = []
  test_data = self.get_test_data_as_jsonl()
- try:
- prediction_data = self.asset(test_data)
- except Exception as ex:
- for d in test_data:
- prediction_data.append(self.asset(**d))
 
+ for d in test_data:
+ prediction_data.append(self.asset(**d))
+ 
  return prediction_data
@@ -81,5 +81,6 @@ def calculate_metrics(self):
  return compute_metrics(
  metrics=self.metrics,
  task_type=self.task_type,
+ use_chat_completion=True,
  **metrics_calculation_data
  )
@@ -54,5 +54,6 @@ def __exit__(self, exc_type, exc_val, exc_tb):
  mlflow.log_artifact(self.user_log_path, "user_logs")
 
  self.user_log_fp.close()
- os.remove(self.user_log_path)
- self.logger.debug("User scope execution complete.")
+ # Commenting this out due to a bug where file is help by another process causing delete to fail
+ # os.remove(self.user_log_path)
+ self.logger.debug("User scope execution complete.")
@@ -78,5 +78,29 @@ def _get_artifact_dir_path(path):
  return f"azureml://datastores/{datastore}/paths/{file_path}"
 
 
-
-
+def _write_properties_to_run_history(properties: dict, logger) -> None:
+ import mlflow
+ from mlflow.tracking import MlflowClient
+ from mlflow.utils.rest_utils import http_request
+
+ # get mlflow run
+ run = mlflow.active_run()
+ if run is None:
+ run = mlflow.start_run()
+ # get auth from client
+ client = MlflowClient()
+ try:
+ cred = client._tracking_client.store.get_host_creds() # pylint: disable=protected-access
+ # update host to run history and request PATCH API
+ cred.host = cred.host.replace("mlflow/v2.0", "mlflow/v1.0").replace("mlflow/v1.0", "history/v1.0")
+ response = http_request(
+ host_creds=cred,
+ endpoint=f"/experimentids/{run.info.experiment_id}/runs/{run.info.run_id}",
+ method="PATCH",
+ json={"runId": run.info.run_id, "properties": properties},
+ )
+ if response.status_code != 200:
+ logger.error("Fail writing properties '%s' to run history: %s", properties, response.text)
+ response.raise_for_status()
+ except AttributeError as e:
+ logger.error("Fail writing properties '%s' to run history: %s", properties, e)
Original file line number	Diff line number	Diff line change
`@@ -81,5 +81,6 @@ def calculate_metrics(self):`
`81`	`81`	`return compute_metrics(`
`82`	`82`	`metrics=self.metrics,`
`83`	`83`	`task_type=self.task_type,`
	`84`	`+ use_chat_completion=True,`
`84`	`85`	`**metrics_calculation_data`
`85`	`86`	`)`