44# Copyright (c) 2023 Oracle and/or its affiliates.
55# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66
7- from ..operator_config import AnomalyOperatorConfig
8- from .. import utils
9- from ads .opctl .operator .common .utils import default_signer
7+ from ..operator_config import AnomalyOperatorSpec
8+ from ads .opctl .operator .lowcode .common .utils import (
9+ default_signer ,
10+ load_data ,
11+ merge_category_columns ,
12+ )
13+ from ads .opctl .operator .lowcode .common .data import AbstractData
14+ from ads .opctl .operator .lowcode .common .data import AbstractData
15+ from ads .opctl .operator .lowcode .anomaly .utils import get_frequency_of_datetime
1016from ads .opctl import logger
1117import pandas as pd
1218from ads .opctl .operator .lowcode .anomaly .const import OutputColumns
1319
1420
21+ class AnomalyData (AbstractData ):
22+ def __init__ (self , spec : AnomalyOperatorSpec ):
23+ super ().__init__ (spec = spec , name = "input_data" )
24+
25+
26+ class TestData (AbstractData ):
27+ def __init__ (self , spec : AnomalyOperatorSpec ):
28+ super ().__init__ (spec = spec , name = "test_data" )
29+
30+
1531class AnomalyDatasets :
16- def __init__ (self , config : AnomalyOperatorConfig ):
32+ def __init__ (self , spec : AnomalyOperatorSpec ):
1733 """Instantiates the DataIO instance.
1834
1935 Properties
2036 ----------
21- config: AnomalyOperatorConfig
22- The anomaly operator configuration .
37+ spec: AnomalyOperatorSpec
38+ The anomaly operator spec .
2339 """
24- self .original_user_data = None
25- self .data = None
26- self .test_data = None
27- self .target_columns = None
28- self .full_data_dict = None
29- self ._load_data (config .spec )
30-
31- def _load_data (self , spec ):
32- """Loads anomaly input data."""
33-
34- self .data = utils ._load_data (
35- filename = spec .input_data .url ,
36- format = spec .input_data .format ,
37- storage_options = default_signer (),
38- columns = spec .input_data .columns ,
39- )
40- self .original_user_data = self .data .copy ()
41- date_col = spec .datetime_column .name
42- self .data [date_col ] = pd .to_datetime (self .data [date_col ])
43- try :
44- spec .freq = utils .get_frequency_of_datetime (self .data , spec )
45- except TypeError as e :
46- logger .warn (
47- f"Error determining frequency: { e .args } . Setting Frequency to None"
48- )
49- logger .debug (f"Full traceback: { e } " )
50- spec .freq = None
51-
52- if spec .target_category_columns is None :
53- if spec .target_column is None :
54- target_col = [
55- col
56- for col in self .data .columns
57- if col not in [spec .datetime_column .name ]
58- ]
59- spec .target_column = target_col [0 ]
60- self .full_data_dict = {spec .target_column : self .data }
61- else :
62- # Merge target category columns
63-
64- self .data ["__Series__" ] = utils ._merge_category_columns (self .data , spec .target_category_columns )
65- unique_categories = self .data ["__Series__" ].unique ()
66- self .full_data_dict = dict ()
67-
68- for cat in unique_categories :
69- data_by_cat = (
70- self .data [self .data ["__Series__" ] == cat ].drop (spec .target_category_columns + ["__Series__" ],
71- axis = 1 )
72- )
73- self .full_data_dict [cat ] = data_by_cat
40+ self ._data = AnomalyData (spec )
41+ self .data = self ._data .get_data_long ()
42+ # self.test_data = None
43+ # self.target_columns = None
44+ self .full_data_dict = self ._data .get_dict_by_series ()
45+ # self._load_data(spec)
46+
47+ # def _load_data(self, spec):
48+ # """Loads anomaly input data."""
49+ # try:
50+ # self.data = load_data(
51+ # filename=spec.input_data.url,
52+ # format=spec.input_data.format,
53+ # columns=spec.input_data.columns,
54+ # )
55+ # except InvalidParameterError as e:
56+ # e.args = e.args + ("Invalid Parameter: input_data",)
57+ # raise e
58+ # date_col = spec.datetime_column.name
59+ # self.data[date_col] = pd.to_datetime(self.data[date_col])
60+ # try:
61+ # spec.freq = get_frequency_of_datetime(self.data, spec)
62+ # except TypeError as e:
63+ # logger.warn(
64+ # f"Error determining frequency: {e.args}. Setting Frequency to None"
65+ # )
66+ # logger.debug(f"Full traceback: {e}")
67+ # spec.freq = None
68+
69+ # if spec.target_category_columns is None:
70+ # if spec.target_column is None:
71+ # target_col = [
72+ # col
73+ # for col in self.data.columns
74+ # if col not in [spec.datetime_column.name]
75+ # ]
76+ # spec.target_column = target_col[0]
77+ # self.full_data_dict = {spec.target_column: self.data}
78+ # else:
79+ # # Merge target category columns
80+
81+ # self.data[OutputColumns.Series] = merge_category_columns(
82+ # self.data, spec.target_category_columns
83+ # )
84+ # unique_categories = self.data[OutputColumns.Series].unique()
85+ # self.full_data_dict = dict()
86+
87+ # for cat in unique_categories:
88+ # data_by_cat = self.data[self.data[OutputColumns.Series] == cat].drop(
89+ # spec.target_category_columns + [OutputColumns.Series], axis=1
90+ # )
91+ # self.full_data_dict[cat] = data_by_cat
7492
7593
7694class AnomalyOutput :
@@ -93,11 +111,7 @@ def get_inliers_by_cat(self, category: str, data: pd.DataFrame):
93111 inlier_indices = anomaly .index [anomaly [OutputColumns .ANOMALY_COL ] == 0 ]
94112 inliers = data .iloc [inlier_indices ]
95113 if scores is not None and not scores .empty :
96- inliers = pd .merge (
97- inliers ,
98- scores ,
99- on = self .date_column ,
100- how = 'inner' )
114+ inliers = pd .merge (inliers , scores , on = self .date_column , how = "inner" )
101115 return inliers
102116
103117 def get_outliers_by_cat (self , category : str , data : pd .DataFrame ):
@@ -106,11 +120,7 @@ def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
106120 outliers_indices = anomaly .index [anomaly [OutputColumns .ANOMALY_COL ] == 1 ]
107121 outliers = data .iloc [outliers_indices ]
108122 if scores is not None and not scores .empty :
109- outliers = pd .merge (
110- outliers ,
111- scores ,
112- on = self .date_column ,
113- how = 'inner' )
123+ outliers = pd .merge (outliers , scores , on = self .date_column , how = "inner" )
114124 return outliers
115125
116126 def get_inliers (self , data ):
@@ -120,9 +130,12 @@ def get_inliers(self, data):
120130 inliers = pd .concat (
121131 [
122132 inliers ,
123- self .get_inliers_by_cat (
124- category , data [data ['__Series__' ] == category ].reset_index (drop = True ).drop ('__Series__' , axis = 1 )
125- )
133+ self .get_inliers_by_cat (
134+ category ,
135+ data [data [OutputColumns .Series ] == category ]
136+ .reset_index (drop = True )
137+ .drop (OutputColumns .Series , axis = 1 ),
138+ ),
126139 ],
127140 axis = 0 ,
128141 ignore_index = True ,
@@ -137,8 +150,11 @@ def get_outliers(self, data):
137150 [
138151 outliers ,
139152 self .get_outliers_by_cat (
140- category , data [data ['__Series__' ] == category ].reset_index (drop = True ).drop ('__Series__' , axis = 1 )
141- )
153+ category ,
154+ data [data [OutputColumns .Series ] == category ]
155+ .reset_index (drop = True )
156+ .drop (OutputColumns .Series , axis = 1 ),
157+ ),
142158 ],
143159 axis = 0 ,
144160 ignore_index = True ,
0 commit comments