@@ -69,22 +69,28 @@ cdef bint is_monotonic_increasing_start_end_bounds(
6969# Rolling sum
7070
7171
72- cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil:
72+ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x,
73+ int64_t num_consecutive_same_value, float64_t prev_value
74+ ) nogil:
7375 cdef:
7476 float64_t result
7577
7678 if nobs == 0 == minp:
7779 result = 0
7880 elif nobs >= minp:
79- result = sum_x
81+ if num_consecutive_same_value >= nobs:
82+ result = prev_value * nobs
83+ else :
84+ result = sum_x
8085 else :
8186 result = NaN
8287
8388 return result
8489
8590
8691cdef inline void add_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
87- float64_t * compensation) nogil:
92+ float64_t * compensation, int64_t * num_consecutive_same_value,
93+ float64_t * prev_value) nogil:
8894 """ add a value from the sum calc using Kahan summation """
8995
9096 cdef:
@@ -98,6 +104,14 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
98104 compensation[0 ] = t - sum_x[0 ] - y
99105 sum_x[0 ] = t
100106
107+ # GH#42064, record num of same values to remove floating point artifacts
108+ if val == prev_value[0 ]:
109+ num_consecutive_same_value[0 ] += 1
110+ else :
111+ # reset to 1 (include current value itself)
112+ num_consecutive_same_value[0 ] = 1
113+ prev_value[0 ] = val
114+
101115
102116cdef inline void remove_sum(float64_t val, int64_t * nobs, float64_t * sum_x,
103117 float64_t * compensation) nogil:
@@ -119,8 +133,8 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
119133 ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
120134 cdef:
121135 Py_ssize_t i , j
122- float64_t sum_x , compensation_add , compensation_remove
123- int64_t s , e
136+ float64_t sum_x , compensation_add , compensation_remove , prev_value
137+ int64_t s , e , num_consecutive_same_value
124138 int64_t nobs = 0 , N = len (start)
125139 ndarray[float64_t] output
126140 bint is_monotonic_increasing_bounds
@@ -139,11 +153,13 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
139153 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
140154
141155 # setup
142-
156+ prev_value = values[s]
157+ num_consecutive_same_value = 0
143158 sum_x = compensation_add = compensation_remove = 0
144159 nobs = 0
145160 for j in range (s, e):
146- add_sum(values[j], & nobs, & sum_x, & compensation_add)
161+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
162+ & num_consecutive_same_value, & prev_value)
147163
148164 else :
149165
@@ -153,9 +169,10 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
153169
154170 # calculate adds
155171 for j in range (end[i - 1 ], e):
156- add_sum(values[j], & nobs, & sum_x, & compensation_add)
172+ add_sum(values[j], & nobs, & sum_x, & compensation_add,
173+ & num_consecutive_same_value, & prev_value)
157174
158- output[i] = calc_sum(minp, nobs, sum_x)
175+ output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value )
159176
160177 if not is_monotonic_increasing_bounds:
161178 nobs = 0
@@ -169,14 +186,17 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
169186# Rolling mean
170187
171188
172- cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
173- Py_ssize_t neg_ct, float64_t sum_x) nogil:
189+ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct,
190+ float64_t sum_x, int64_t num_consecutive_same_value,
191+ float64_t prev_value) nogil:
174192 cdef:
175193 float64_t result
176194
177195 if nobs >= minp and nobs > 0 :
178196 result = sum_x / < float64_t> nobs
179- if neg_ct == 0 and result < 0 :
197+ if num_consecutive_same_value >= nobs:
198+ result = prev_value
199+ elif neg_ct == 0 and result < 0 :
180200 # all positive
181201 result = 0
182202 elif neg_ct == nobs and result > 0 :
@@ -190,7 +210,8 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
190210
191211
192212cdef inline void add_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
193- Py_ssize_t * neg_ct, float64_t * compensation) nogil:
213+ Py_ssize_t * neg_ct, float64_t * compensation,
214+ int64_t * num_consecutive_same_value, float64_t * prev_value) nogil:
194215 """ add a value from the mean calc using Kahan summation """
195216 cdef:
196217 float64_t y, t
@@ -205,6 +226,14 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
205226 if signbit(val):
206227 neg_ct[0 ] = neg_ct[0 ] + 1
207228
229+ # GH#42064, record num of same values to remove floating point artifacts
230+ if val == prev_value[0 ]:
231+ num_consecutive_same_value[0 ] += 1
232+ else :
233+ # reset to 1 (include current value itself)
234+ num_consecutive_same_value[0 ] = 1
235+ prev_value[0 ] = val
236+
208237
209238cdef inline void remove_mean(float64_t val, Py_ssize_t * nobs, float64_t * sum_x,
210239 Py_ssize_t * neg_ct, float64_t * compensation) nogil:
@@ -225,8 +254,8 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
225254def roll_mean (const float64_t[:] values , ndarray[int64_t] start ,
226255 ndarray[int64_t] end , int64_t minp ) -> np.ndarray:
227256 cdef:
228- float64_t val , compensation_add , compensation_remove , sum_x
229- int64_t s , e
257+ float64_t val , compensation_add , compensation_remove , sum_x , prev_value
258+ int64_t s , e , num_consecutive_same_value
230259 Py_ssize_t nobs , i , j , neg_ct , N = len (start)
231260 ndarray[float64_t] output
232261 bint is_monotonic_increasing_bounds
@@ -244,12 +273,15 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
244273
245274 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
246275
276+ # setup
247277 compensation_add = compensation_remove = sum_x = 0
248278 nobs = neg_ct = 0
249- # setup
279+ prev_value = values[s]
280+ num_consecutive_same_value = 0
250281 for j in range (s, e):
251282 val = values[j]
252- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
283+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
284+ & num_consecutive_same_value, & prev_value)
253285
254286 else :
255287
@@ -261,9 +293,10 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
261293 # calculate adds
262294 for j in range (end[i - 1 ], e):
263295 val = values[j]
264- add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add)
296+ add_mean(val, & nobs, & sum_x, & neg_ct, & compensation_add,
297+ & num_consecutive_same_value, & prev_value)
265298
266- output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
299+ output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value )
267300
268301 if not is_monotonic_increasing_bounds:
269302 nobs = 0
0 commit comments