11import collections
22import threading
3- from typing import Optional
3+ from typing import Optional , Union
44
55from .retryable_logs import RetryableLogs
66from .evaluation_details import EvaluationDetails
77from .evaluator import _ConfigEvaluation
88from .statsig_event import StatsigEvent
9+ # from .statsig_user import StatsigUser
910from .layer import Layer
1011from .utils import logger
1112from .thread_util import spawn_background_thread , THREAD_JOIN_TIMEOUT
1415_LAYER_EXPOSURE_EVENT = "statsig::layer_exposure"
1516_GATE_EXPOSURE_EVENT = "statsig::gate_exposure"
1617
18+ _IGNORED_METADATA_KEYS = {'serverTime' , 'configSyncTime' , 'initTime' , 'reason' }
1719
1820def _safe_add_evaluation_to_event (
19- evaluation_details : EvaluationDetails , event : StatsigEvent ):
20- if evaluation_details is None :
21+ evaluation_details : Union [ EvaluationDetails , None ] , event : StatsigEvent ):
22+ if evaluation_details is None or event is None or event . metadata is None :
2123 return
2224
2325 event .metadata ["reason" ] = evaluation_details .reason
@@ -29,10 +31,12 @@ def _safe_add_evaluation_to_event(
2931class _StatsigLogger :
3032 _background_flush : Optional [threading .Thread ]
3133 _background_retry : Optional [threading .Thread ]
34+ _background_deduper : Optional [threading .Thread ]
3235
3336 def __init__ (self , net , shutdown_event , statsig_metadata , error_boundary , options ):
3437 self ._events = []
3538 self ._retry_logs = collections .deque (maxlen = 10 )
39+ self ._deduper = set ()
3640 self ._net = net
3741 self ._statsig_metadata = statsig_metadata
3842 self ._local_mode = options .local_mode
@@ -44,6 +48,7 @@ def __init__(self, net, shutdown_event, statsig_metadata, error_boundary, option
4448 self ._shutdown_event = shutdown_event
4549 self ._background_flush = None
4650 self ._background_retry = None
51+ self ._background_deduper = None
4752 self .spawn_bg_threads_if_needed ()
4853
4954 def spawn_bg_threads_if_needed (self ):
@@ -58,6 +63,10 @@ def spawn_bg_threads_if_needed(self):
5863 self ._background_retry = spawn_background_thread (
5964 self ._periodic_retry , (self ._shutdown_event ,), self ._error_boundary )
6065
66+ if self ._background_deduper is None or not self ._background_deduper .is_alive ():
67+ self ._background_deduper = spawn_background_thread (
68+ self ._periodic_dedupe_clear , (self ._shutdown_event ,), self ._error_boundary )
69+
6170 def log (self , event ):
6271 if self ._local_mode :
6372 return
@@ -73,6 +82,9 @@ def log_gate_exposure(self, user, gate, value, rule_id, secondary_exposures,
7382 "gateValue" : "true" if value else "false" ,
7483 "ruleID" : rule_id ,
7584 }
85+ if not self ._is_unique_exposure (user , _GATE_EXPOSURE_EVENT , event .metadata ):
86+ return
87+
7688 if is_manual_exposure :
7789 event .metadata ["isManualExposure" ] = "true"
7890
@@ -90,6 +102,8 @@ def log_config_exposure(self, user, config, rule_id, secondary_exposures,
90102 "config" : config ,
91103 "ruleID" : rule_id ,
92104 }
105+ if not self ._is_unique_exposure (user , _CONFIG_EXPOSURE_EVENT , event .metadata ):
106+ return
93107 if is_manual_exposure :
94108 event .metadata ["isManualExposure" ] = "true"
95109
@@ -111,20 +125,24 @@ def log_layer_exposure(self, user, layer: Layer, parameter_name: str,
111125 exposures = config_evaluation .secondary_exposures
112126 allocated_experiment = config_evaluation .allocated_experiment
113127
114- event . metadata = {
128+ metadata = {
115129 "config" : layer .name ,
116130 "ruleID" : layer .rule_id ,
117131 "allocatedExperiment" : allocated_experiment ,
118132 "parameterName" : parameter_name ,
119133 "isExplicitParameter" : "true" if is_explicit else "false"
120134 }
135+ if not self ._is_unique_exposure (user , _LAYER_EXPOSURE_EVENT , metadata ):
136+ return
137+ event .metadata = metadata
121138 if is_manual_exposure :
122139 event .metadata ["isManualExposure" ] = "true"
123140
124141 event ._secondary_exposures = [] if exposures is None else exposures
125142
126143 _safe_add_evaluation_to_event (
127144 config_evaluation .evaluation_details , event )
145+
128146 self .log (event )
129147
130148 def flush (self ):
@@ -157,11 +175,19 @@ def _periodic_flush(self, shutdown_event):
157175 except Exception as e :
158176 self ._error_boundary .log_exception (e )
159177
178+ def _periodic_dedupe_clear (self , shutdown_event ):
179+ while True :
180+ try :
181+ if shutdown_event .wait (self ._logging_interval ):
182+ break
183+ self ._deduper = set ()
184+ except Exception as e :
185+ self ._error_boundary .log_exception (e )
186+
160187 def _periodic_retry (self , shutdown_event ):
161188 while True :
162189 if shutdown_event .wait (self ._retry_interval ):
163190 break
164-
165191 length = len (self ._retry_logs )
166192 for _i in range (length ):
167193 try :
@@ -178,3 +204,24 @@ def _periodic_retry(self, shutdown_event):
178204 return
179205
180206 self ._retry_logs .append (RetryableLogs (retry_logs .payload , retry_logs .retries ))
207+
208+ def _is_unique_exposure (self , user , eventName : str , metadata : dict or None ) -> bool :
209+ if user is None :
210+ return True
211+ if len (self ._deduper ) > 10000 :
212+ self ._deduper = set ()
213+ custom_id_key = ''
214+ if user .custom_ids and isinstance (user .custom_ids , dict ):
215+ custom_id_key = ',' .join (user .custom_ids .values ())
216+
217+ metadata_key = ''
218+ if metadata and isinstance (metadata , dict ):
219+ metadata_key = ',' .join (str (value ) for key , value in metadata .items () if key not in _IGNORED_METADATA_KEYS )
220+
221+ key = ',' .join (str (item ) for item in [user .user_id , custom_id_key , eventName , metadata_key ])
222+
223+ if key in self ._deduper :
224+ return False
225+
226+ self ._deduper .add (key )
227+ return True
0 commit comments