Skip to content

Commit cc3c40c

Browse files
authored
Sync azure-ai-generative with Latest Changes (Azure#32651)
* sync setup.py * manifest + non-python file removal * sync generative package contents
1 parent 7ea5f7d commit cc3c40c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+3948
-128
lines changed

sdk/ai/azure-ai-generative/MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ include *.md
44
include azure/__init__.py
55
include azure/ai/__init__.py
66
include azure/ai/generative/py.typed
7-
include azure/ai/generative/operations/component-configs/*
87
include azure/ai/generative/index/_utils/encodings/*
98
recursive-include azure/ai/generative/synthetic/templates *.txt
9+
recursive-include azure/ai/generative/synthetic/simulator/templates *.md

sdk/ai/azure-ai-generative/azure/ai/generative/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44

5+
__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore
6+
7+
import logging
8+
59
from ._ai_client import AIClient
6-
from ._version import VERSION
10+
from ._telemetry import initialize_logger_info
11+
12+
module_logger = logging.getLogger(__name__)
13+
initialize_logger_info(module_logger, terminator="\n")
714

815
__all__ = [
916
"AIClient",
1017
]
1118

19+
VERSION = "0.1.0"
1220
__version__ = VERSION

sdk/ai/azure-ai-generative/azure/ai/generative/entities/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@
1010
from .mlindex import MLIndex
1111
from .project import Project
1212
from .data import Data
13+
from .configs import AzureOpenAIModelConfiguration
1314

14-
__all__ = ["Connection", "MLIndex", "Project", "AIResource", "Data"]
15+
__all__ = ["Connection", "MLIndex", "Project", "AIResource", "Data", "AzureOpenAIModelConfiguration"]
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# ---------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# ---------------------------------------------------------
4+
from dataclasses import dataclass
5+
from typing import Any, Dict
6+
from azure.ai.generative.entities.connection import Connection
7+
from azure.ai.ml._utils.utils import camel_to_snake
8+
9+
10+
11+
@dataclass
12+
class AzureOpenAIModelConfiguration:
13+
api_base: str
14+
api_key: str
15+
api_version: str
16+
model_name: str
17+
deployment_name: str
18+
model_kwargs: Dict[str, Any]
19+
20+
@staticmethod
21+
def from_connection(
22+
connection: Connection, model_name: str, deployment_name: str, **model_kwargs
23+
) -> 'AzureOpenAIModelConfiguration':
24+
if not isinstance(connection, Connection) or camel_to_snake(connection.type) != "azure_open_ai":
25+
raise TypeError(
26+
"Only AzureOpenAI connection objects are supported."
27+
)
28+
key = connection.credentials.get("key")
29+
if key is None:
30+
raise ValueError("Unable to retrieve openai key from connection object.")
31+
32+
return AzureOpenAIModelConfiguration(
33+
api_base=connection.target,
34+
api_key=connection.credentials.get("key"),
35+
api_version=connection.metadata.get("ApiVersion"),
36+
model_name=model_name,
37+
deployment_name=deployment_name,
38+
model_kwargs=model_kwargs,
39+
)

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_base_handler.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ def __init__(self, asset, test_data, prediction_data=None, ground_truth=None, **
2626

2727
self._test_data = test_data_df
2828

29-
if self._prediction_data is None:
30-
self._prediction_data = prediction_data
31-
3229
self.params_dict = kwargs.pop("params_dict", None)
3330

3431
@property

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_evaluate.py

Lines changed: 64 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
from azure.ai.generative.evaluate._utils import _is_flow, load_jsonl, _get_artifact_dir_path
2222
from azure.ai.generative.evaluate._mlflow_log_collector import RedirectUserOutputStreams
2323

24+
from ._utils import _write_properties_to_run_history
2425

2526
LOGGER = logging.getLogger(__name__)
2627

28+
2729
def _get_handler_class(
2830
asset,
2931
):
@@ -75,55 +77,58 @@ def _log_metrics(run_id, metrics):
7577

7678
def evaluate(
7779
evaluation_name=None,
78-
asset=None,
79-
asset_type=None,
80+
target=None,
8081
data=None,
81-
truth_data=None,
82-
prediction_data=None,
8382
task_type=None,
84-
metrics_config=None,
85-
params=None,
86-
metrics=None,
83+
sweep_args=None,
84+
metrics_list=None,
85+
model_config=None,
86+
data_mapping=None,
8787
**kwargs
8888
):
8989
results_list = []
90+
metrics_config = {}
9091
if "tracking_uri" in kwargs:
9192
mlflow.set_tracking_uri(kwargs.get("tracking_uri"))
9293

93-
if params:
94+
if model_config:
95+
metrics_config.update({"openai_params": model_config})
96+
97+
if data_mapping:
98+
metrics_config.update(data_mapping)
99+
100+
if sweep_args:
94101
import itertools
95-
keys, values = zip(*params.items())
102+
keys, values = zip(*sweep_args.items())
96103
params_permutations_dicts = [dict(zip(keys, v)) for v in itertools.product(*values)]
97104

98105
with mlflow.start_run(run_name=evaluation_name) as run:
99-
log_param_and_tag("_azureml.evaluation_run", True)
106+
log_property_and_tag("_azureml.evaluation_run", "azure-ai-generative")
100107
for index, params_permutations_dict in enumerate(params_permutations_dicts):
101108
evaluation_name_variant = f"{evaluation_name}_{index}" if evaluation_name else f"{run.info.run_name}_{index}"
102109

103110
evaluation_results = _evaluate(
104111
evaluation_name=evaluation_name_variant,
105-
asset=asset,
112+
target=target,
106113
data=data,
107-
truth_data=truth_data,
108-
prediction_data=prediction_data,
109114
task_type=task_type,
110-
metrics_config=metrics_config,
115+
model_config=model_config,
116+
data_mapping=data_mapping,
111117
params_dict=params_permutations_dict,
112-
metrics=metrics,
118+
metrics=metrics_list,
113119
**kwargs
114120
)
115121
results_list.append(evaluation_results)
116122
return results_list
117123
else:
118124
evaluation_result = _evaluate(
119125
evaluation_name=evaluation_name,
120-
asset=asset,
126+
target=target,
121127
data=data,
122-
truth_data=truth_data,
123-
prediction_data=prediction_data,
124128
task_type=task_type,
125-
metrics_config=metrics_config,
126-
metrics=metrics,
129+
model_config=model_config,
130+
data_mapping=data_mapping,
131+
metrics=metrics_list,
127132
**kwargs
128133
)
129134

@@ -132,14 +137,14 @@ def evaluate(
132137

133138
def _evaluate(
134139
evaluation_name=None,
135-
asset=None,
136-
asset_type=None,
140+
target=None,
137141
data=None,
138142
truth_data=None,
139143
prediction_data=None,
140144
task_type=None,
141-
metrics_config=None,
142145
metrics=None,
146+
data_mapping=None,
147+
model_config=None,
143148
**kwargs
144149
):
145150
try:
@@ -151,23 +156,36 @@ def _evaluate(
151156
test_data = data
152157
_data_is_file = False
153158

154-
if asset is None and prediction_data is None:
155-
raise Exception("asset and prediction data cannot be null")
159+
if "y_pred" in data_mapping:
160+
prediction_data = data_mapping.get("y_pred")
161+
162+
if "y_test" in data_mapping:
163+
truth_data = data_mapping.get("y_test")
164+
165+
if target is None and prediction_data is None:
166+
raise Exception("target and prediction data cannot be null")
156167

157168
if task_type not in [constants.Tasks.QUESTION_ANSWERING, constants.Tasks.CHAT_COMPLETION]:
158169
raise Exception(f"task type {task_type} is not supported")
159170

160-
with mlflow.start_run(nested=True if mlflow.active_run() else False, run_name=evaluation_name) as run,\
161-
RedirectUserOutputStreams(logger=LOGGER) as _:
162-
163-
log_param_and_tag("_azureml.evaluation_run", True)
171+
metrics_config = {}
172+
if model_config:
173+
metrics_config.update({"openai_params": model_config})
174+
175+
if data_mapping:
176+
metrics_config.update(data_mapping)
177+
178+
with mlflow.start_run(nested=True if mlflow.active_run() else False, run_name=evaluation_name) as run, \
179+
RedirectUserOutputStreams(logger=LOGGER) as _:
180+
181+
log_property_and_tag("_azureml.evaluation_run", "azure-ai-generative")
164182
# Log input is a preview feature behind an allowlist. Uncomment this line once the feature is broadly available.
165183
# log_input(data=data, data_is_file=_data_is_file)
166184

167-
asset_handler_class = _get_handler_class(asset)
185+
asset_handler_class = _get_handler_class(target)
168186

169187
asset_handler = asset_handler_class(
170-
asset=asset,
188+
asset=target,
171189
prediction_data=prediction_data,
172190
ground_truth=truth_data,
173191
test_data=test_data,
@@ -211,7 +229,7 @@ def _get_instance_table():
211229

212230
with tempfile.TemporaryDirectory() as tmpdir:
213231
for param_name, param_value in kwargs.get("params_dict", {}).items():
214-
232+
215233
try:
216234
mlflow.log_param(param_name, param_value)
217235
except MlflowException as ex:
@@ -220,8 +238,9 @@ def _get_instance_table():
220238
# But since we control how params are logged, this is prob fine for now.
221239

222240
if ex.error_code == ErrorCode.Name(INVALID_PARAMETER_VALUE):
223-
LOGGER.warning(f"Parameter {param_name} value is too long to log. Truncating and logging it as an artifact.")
224-
241+
LOGGER.warning(
242+
f"Parameter {param_name} value is too long to log. Truncating and logging it as an artifact.")
243+
225244
# Truncate the value to 500 bytes and log it.
226245
truncated_value = param_value.encode('utf-8')[:500].decode('utf-8', 'ignore')
227246
mlflow.log_param(param_name, truncated_value)
@@ -237,20 +256,22 @@ def _get_instance_table():
237256
eval_artifact_df = _get_instance_table().to_json(orient="records", lines=True, force_ascii=False)
238257
tmp_path = os.path.join(tmpdir, "eval_results.jsonl")
239258

240-
with open(tmp_path, "w") as f:
259+
with open(tmp_path, "w", encoding="utf-8") as f:
241260
f.write(eval_artifact_df)
242261

243262
mlflow.log_artifact(tmp_path)
244-
log_param_and_tag("_azureml.evaluate_artifacts", json.dumps([{"path": "eval_results.jsonl", "type": "table"}]))
263+
log_property_and_tag("_azureml.evaluate_artifacts",
264+
json.dumps([{"path": "eval_results.jsonl", "type": "table"}]))
245265
mlflow.log_param("task_type", task_type)
246266
log_param_and_tag("_azureml.evaluate_metric_mapping", json.dumps(metrics_handler._metrics_mapping_to_log))
247267

248268
return metrics
249269

270+
250271
def log_input(data, data_is_file):
251272
try:
252-
# Mlflow service supports only uri_folder, hence this is need to create a dir to log input data.
253-
# once support is extended, we can revisit this logic
273+
# Mlflow service supports only uri_folder, hence this is need to create a dir to log input data.
274+
# once support is extended, we can revisit this logic
254275
with tempfile.TemporaryDirectory() as tempdir:
255276
if data_is_file:
256277
file_name = os.path.basename(data)
@@ -271,6 +292,11 @@ def log_input(data, data_is_file):
271292
LOGGER.error("Error logging data as dataset, continuing without it")
272293
LOGGER.exception(ex, stack_info=True)
273294

295+
274296
def log_param_and_tag(key, value):
275297
mlflow.log_param(key, value)
276298
mlflow.set_tag(key, value)
299+
300+
def log_property_and_tag(key, value, logger=LOGGER):
301+
_write_properties_to_run_history({key: value}, logger)
302+
mlflow.set_tag(key, value)

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_local_code_handler.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,8 @@ def generate_prediction_data(self):
2626
# TODO: Check if this is the right place for this logic
2727
prediction_data = []
2828
test_data = self.get_test_data_as_jsonl()
29-
try:
30-
prediction_data = self.asset(test_data)
31-
except Exception as ex:
32-
for d in test_data:
33-
prediction_data.append(self.asset(**d))
3429

30+
for d in test_data:
31+
prediction_data.append(self.asset(**d))
32+
3533
return prediction_data

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_metric_handler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,5 +81,6 @@ def calculate_metrics(self):
8181
return compute_metrics(
8282
metrics=self.metrics,
8383
task_type=self.task_type,
84+
use_chat_completion=True,
8485
**metrics_calculation_data
8586
)

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_mlflow_log_collector.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,6 @@ def __exit__(self, exc_type, exc_val, exc_tb):
5454
mlflow.log_artifact(self.user_log_path, "user_logs")
5555

5656
self.user_log_fp.close()
57-
os.remove(self.user_log_path)
58-
self.logger.debug("User scope execution complete.")
57+
# Commenting this out due to a bug where file is help by another process causing delete to fail
58+
# os.remove(self.user_log_path)
59+
self.logger.debug("User scope execution complete.")

sdk/ai/azure-ai-generative/azure/ai/generative/evaluate/_utils.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,29 @@ def _get_artifact_dir_path(path):
7878
return f"azureml://datastores/{datastore}/paths/{file_path}"
7979

8080

81-
82-
81+
def _write_properties_to_run_history(properties: dict, logger) -> None:
82+
import mlflow
83+
from mlflow.tracking import MlflowClient
84+
from mlflow.utils.rest_utils import http_request
85+
86+
# get mlflow run
87+
run = mlflow.active_run()
88+
if run is None:
89+
run = mlflow.start_run()
90+
# get auth from client
91+
client = MlflowClient()
92+
try:
93+
cred = client._tracking_client.store.get_host_creds() # pylint: disable=protected-access
94+
# update host to run history and request PATCH API
95+
cred.host = cred.host.replace("mlflow/v2.0", "mlflow/v1.0").replace("mlflow/v1.0", "history/v1.0")
96+
response = http_request(
97+
host_creds=cred,
98+
endpoint=f"/experimentids/{run.info.experiment_id}/runs/{run.info.run_id}",
99+
method="PATCH",
100+
json={"runId": run.info.run_id, "properties": properties},
101+
)
102+
if response.status_code != 200:
103+
logger.error("Fail writing properties '%s' to run history: %s", properties, response.text)
104+
response.raise_for_status()
105+
except AttributeError as e:
106+
logger.error("Fail writing properties '%s' to run history: %s", properties, e)

0 commit comments

Comments
 (0)