Skip to content

Commit 9a12097

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Feature Store - Support returning stats in get feature
PiperOrigin-RevId: 705152184
1 parent adf24ac commit 9a12097

File tree

5 files changed

+149
-3
lines changed

5 files changed

+149
-3
lines changed

tests/unit/vertexai/feature_store_constants.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,19 @@
459459
labels=_TEST_FG1_FMJ2_LABELS,
460460
)
461461
_TEST_FG1_FMJ_LIST = [_TEST_FG1_FMJ1, _TEST_FG1_FMJ2]
462+
463+
_TEST_FG1_F1_FEATURE_STATS_AND_ANOMALY = types.feature_monitor.FeatureStatsAndAnomaly(
464+
feature_id="my_fg1_f1",
465+
distribution_deviation=0.5,
466+
drift_detection_threshold=0.4,
467+
drift_detected=True,
468+
feature_monitor_job_id="1234567890",
469+
feature_monitor_id="1234567891",
470+
)
471+
_TEST_FG1_F1_WITH_STATS = types.feature_v1beta1.Feature(
472+
name=_TEST_FG1_F1_PATH,
473+
description=_TEST_FG1_F1_DESCRIPTION,
474+
labels=_TEST_FG1_F1_LABELS,
475+
point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT,
476+
feature_stats_and_anomaly=[_TEST_FG1_F1_FEATURE_STATS_AND_ANOMALY],
477+
)

tests/unit/vertexai/test_feature.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,28 @@
1616
#
1717

1818
import re
19-
from typing import Dict, Optional
19+
from typing import Dict, List, Optional
2020
from unittest import mock
2121
from unittest.mock import call, patch
2222

2323
from google.api_core import operation as ga_operation
2424
from google.cloud import aiplatform
2525
from google.cloud.aiplatform import base
26+
from google.cloud.aiplatform.compat import types
2627
from google.cloud.aiplatform.compat.services import (
2728
feature_registry_service_client,
2829
)
30+
from google.cloud.aiplatform_v1beta1.services.feature_registry_service import (
31+
FeatureRegistryServiceClient,
32+
)
2933
from feature_store_constants import (
3034
_TEST_FG1_F1_DESCRIPTION,
35+
_TEST_FG1_F1_FEATURE_STATS_AND_ANOMALY,
3136
_TEST_FG1_F1_ID,
3237
_TEST_FG1_F1_LABELS,
3338
_TEST_FG1_F1_PATH,
3439
_TEST_FG1_F1_POINT_OF_CONTACT,
40+
_TEST_FG1_F1_WITH_STATS,
3541
_TEST_FG1_F2_DESCRIPTION,
3642
_TEST_FG1_F2_ID,
3743
_TEST_FG1_F2_LABELS,
@@ -60,6 +66,16 @@ def delete_feature_mock():
6066
yield delete_feature_mock
6167

6268

69+
@pytest.fixture
70+
def get_feature_with_stats_and_anomalies_mock():
71+
with patch.object(
72+
FeatureRegistryServiceClient,
73+
"get_feature",
74+
) as get_feature_with_stats_and_anomalies_mock:
75+
get_feature_with_stats_and_anomalies_mock.return_value = _TEST_FG1_F1_WITH_STATS
76+
yield get_feature_with_stats_and_anomalies_mock
77+
78+
6379
pytestmark = pytest.mark.usefixtures("google_auth_mock")
6480

6581

@@ -73,6 +89,9 @@ def feature_eq(
7389
labels: Dict[str, str],
7490
point_of_contact: str,
7591
version_column_name: Optional[str] = None,
92+
feature_stats_and_anomalies: Optional[
93+
List[types.feature_monitor.FeatureStatsAndAnomaly]
94+
] = None,
7695
):
7796
"""Check if a Feature has the appropriate values set."""
7897
assert feature_to_check.name == name
@@ -85,6 +104,10 @@ def feature_eq(
85104

86105
if version_column_name:
87106
assert feature_to_check.version_column_name == version_column_name
107+
if feature_stats_and_anomalies:
108+
assert (
109+
feature_to_check.feature_stats_and_anomalies == feature_stats_and_anomalies
110+
)
88111

89112

90113
def test_init_with_feature_id_and_no_fg_id_raises_error(get_feature_mock):
@@ -206,6 +229,33 @@ def test_init_with_feature_path_for_explicit_version_column(
206229
)
207230

208231

232+
def test_init_with_latest_stats_count(get_feature_with_stats_and_anomalies_mock):
233+
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
234+
235+
feature = Feature(name=_TEST_FG1_F1_PATH, latest_stats_count=1)
236+
237+
get_feature_with_stats_and_anomalies_mock.assert_called_once_with(
238+
request=types.featurestore_service_v1beta1.GetFeatureRequest(
239+
name=_TEST_FG1_F1_PATH,
240+
feature_stats_and_anomaly_spec=types.feature_monitor.FeatureStatsAndAnomalySpec(
241+
latest_stats_count=1
242+
),
243+
)
244+
)
245+
246+
feature_eq(
247+
feature,
248+
name=_TEST_FG1_F1_ID,
249+
resource_name=_TEST_FG1_F1_PATH,
250+
project=_TEST_PROJECT,
251+
location=_TEST_LOCATION,
252+
description=_TEST_FG1_F1_DESCRIPTION,
253+
labels=_TEST_FG1_F1_LABELS,
254+
point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT,
255+
feature_stats_and_anomalies=[_TEST_FG1_F1_FEATURE_STATS_AND_ANOMALY],
256+
)
257+
258+
209259
@pytest.mark.parametrize("sync", [True])
210260
def test_delete_feature(
211261
get_fg_mock, get_feature_mock, delete_feature_mock, base_logger_mock, sync

tests/unit/vertexai/test_feature_group.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@
7070
_TEST_FG1_F1_DESCRIPTION,
7171
_TEST_FG1_F1_LABELS,
7272
_TEST_FG1_F1_POINT_OF_CONTACT,
73+
_TEST_FG1_F1_WITH_STATS,
74+
_TEST_FG1_F1_FEATURE_STATS_AND_ANOMALY,
7375
_TEST_FG1_F2,
7476
_TEST_FG1_F2_ID,
7577
_TEST_FG1_F2_PATH,
@@ -201,6 +203,16 @@ def list_feature_monitors_mock():
201203
yield list_feature_monitors_mock
202204

203205

206+
@pytest.fixture
207+
def get_feature_with_stats_and_anomalies_mock():
208+
with patch.object(
209+
FeatureRegistryServiceClient,
210+
"get_feature",
211+
) as get_feature_with_stats_and_anomalies_mock:
212+
get_feature_with_stats_and_anomalies_mock.return_value = _TEST_FG1_F1_WITH_STATS
213+
yield get_feature_with_stats_and_anomalies_mock
214+
215+
204216
@pytest.fixture()
205217
def mock_base_instantiate_client():
206218
with patch.object(
@@ -452,6 +464,36 @@ def test_get_feature(get_fg_mock, get_feature_mock):
452464
)
453465

454466

467+
def test_get_feature_with_latest_stats_count(
468+
get_fg_mock, get_feature_with_stats_and_anomalies_mock
469+
):
470+
aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
471+
472+
fg = FeatureGroup(_TEST_FG1_ID)
473+
feature = fg.get_feature(_TEST_FG1_F1_ID, latest_stats_count=1)
474+
475+
get_feature_with_stats_and_anomalies_mock.assert_called_once_with(
476+
request=types.featurestore_service_v1beta1.GetFeatureRequest(
477+
name=_TEST_FG1_F1_PATH,
478+
feature_stats_and_anomaly_spec=types.feature_monitor.FeatureStatsAndAnomalySpec(
479+
latest_stats_count=1
480+
),
481+
)
482+
)
483+
484+
feature_eq(
485+
feature,
486+
name=_TEST_FG1_F1_ID,
487+
resource_name=_TEST_FG1_F1_PATH,
488+
project=_TEST_PROJECT,
489+
location=_TEST_LOCATION,
490+
description=_TEST_FG1_F1_DESCRIPTION,
491+
labels=_TEST_FG1_F1_LABELS,
492+
point_of_contact=_TEST_FG1_F1_POINT_OF_CONTACT,
493+
feature_stats_and_anomalies=[_TEST_FG1_F1_FEATURE_STATS_AND_ANOMALY],
494+
)
495+
496+
455497
def test_get_feature_credentials_set_in_init(mock_base_instantiate_client):
456498
credentials = mock.MagicMock(spec=auth_credentials.Credentials)
457499
aiplatform.init(

vertexai/resources/preview/feature_store/feature.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616
#
1717

1818
import re
19-
from typing import Optional
19+
from typing import List, Optional
2020
from google.auth import credentials as auth_credentials
2121
from google.cloud.aiplatform import base
2222
from google.cloud.aiplatform import utils
2323
from google.cloud.aiplatform.compat.types import (
2424
feature as gca_feature,
25+
feature_monitor_v1beta1 as gca_feature_monitor,
26+
feature_v1beta1 as gca_feature_v1beta1,
27+
featurestore_service_v1beta1 as gca_featurestore_service_v1beta1,
2528
)
2629

2730

@@ -44,6 +47,7 @@ def __init__(
4447
feature_group_id: Optional[str] = None,
4548
project: Optional[str] = None,
4649
location: Optional[str] = None,
50+
latest_stats_count: Optional[int] = None,
4751
credentials: Optional[auth_credentials.Credentials] = None,
4852
):
4953
"""Retrieves an existing managed feature.
@@ -62,6 +66,9 @@ def __init__(
6266
location:
6367
Location to retrieve feature from. If not set, the location set
6468
in aiplatform.init will be used.
69+
gca_feature_arg:
70+
The GCA feature object.
71+
Only set when calling from get_feature with latest_stats_count set.
6572
credentials:
6673
Custom credentials to use to retrieve this feature. Overrides
6774
credentials set in aiplatform.init.
@@ -102,7 +109,24 @@ def __init__(
102109

103110
feature = f"{feature_group_path}/features/{name}"
104111

105-
self._gca_resource = self._get_gca_resource(resource_name=feature)
112+
if latest_stats_count is not None:
113+
api_client = self.__class__._instantiate_client(
114+
location=location, credentials=credentials
115+
)
116+
117+
feature_obj: gca_feature_v1beta1.Feature = api_client.select_version(
118+
"v1beta1"
119+
).get_feature(
120+
request=gca_featurestore_service_v1beta1.GetFeatureRequest(
121+
name=f"{feature}",
122+
feature_stats_and_anomaly_spec=gca_feature_monitor.FeatureStatsAndAnomalySpec(
123+
latest_stats_count=latest_stats_count
124+
),
125+
)
126+
)
127+
self._gca_resource = feature_obj
128+
else:
129+
self._gca_resource = self._get_gca_resource(resource_name=feature)
106130

107131
@property
108132
def version_column_name(self) -> str:
@@ -118,3 +142,10 @@ def description(self) -> str:
118142
def point_of_contact(self) -> str:
119143
"""The point of contact for the feature."""
120144
return self._gca_resource.point_of_contact
145+
146+
@property
147+
def feature_stats_and_anomalies(
148+
self,
149+
) -> List[gca_feature_monitor.FeatureStatsAndAnomaly]:
150+
"""The number of latest stats to return. Only present when gca_feature is set."""
151+
return self._gca_resource.feature_stats_and_anomaly

vertexai/resources/preview/feature_store/feature_group.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ def delete(self, force: bool = False, sync: bool = True) -> None:
239239
def get_feature(
240240
self,
241241
feature_id: str,
242+
latest_stats_count: Optional[int] = None,
242243
credentials: Optional[auth_credentials.Credentials] = None,
243244
) -> Feature:
244245
"""Retrieves an existing managed feature.
@@ -257,6 +258,12 @@ def get_feature(
257258
credentials = (
258259
credentials or self.credentials or initializer.global_config.credentials
259260
)
261+
if latest_stats_count is not None:
262+
return Feature(
263+
name=f"{self.resource_name}/features/{feature_id}",
264+
latest_stats_count=latest_stats_count,
265+
credentials=credentials,
266+
)
260267
return Feature(
261268
f"{self.resource_name}/features/{feature_id}", credentials=credentials
262269
)

0 commit comments

Comments
 (0)