pandas-dev
diff --git a/‎doc/source/whatsnew/v1.5.0.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.5.0.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/window/aggregations.pyx‎
Lines changed: 52 additions & 19 deletions b/‎pandas/_libs/window/aggregations.pyx‎
Lines changed: 52 additions & 19 deletions
diff --git a/‎pandas/core/_numba/kernels/mean_.py‎
Lines changed: 52 additions & 8 deletions b/‎pandas/core/_numba/kernels/mean_.py‎
Lines changed: 52 additions & 8 deletions
diff --git a/‎pandas/core/_numba/kernels/sum_.py‎
Lines changed: 48 additions & 8 deletions b/‎pandas/core/_numba/kernels/sum_.py‎
Lines changed: 48 additions & 8 deletions
diff --git a/‎pandas/tests/window/test_rolling.py‎
Lines changed: 11 additions & 0 deletions b/‎pandas/tests/window/test_rolling.py‎
Lines changed: 11 additions & 0 deletions
@@ -630,6 +630,7 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`)
 - Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`)
 - Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)
+- Bug in :meth:`Rolling.sum` and :meth:`Rolling.mean` would give incorrect result with window of same values (:issue:`42064`, :issue:`46431`)
 - Bug in :meth:`Rolling.var` and :meth:`Rolling.std` would give non-zero result with window of same values (:issue:`42064`)
 - Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
 - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
 
@@ -69,22 +69,28 @@ cdef bint is_monotonic_increasing_start_end_bounds(
 # Rolling sum
 
 
-cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil:
+cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x,
+ int64_t num_consecutive_same_value, float64_t prev_value
+ ) nogil:
  cdef:
  float64_t result
 
  if nobs == 0 == minp:
  result = 0
  elif nobs >= minp:
- result = sum_x
+ if num_consecutive_same_value >= nobs:
+ result = prev_value * nobs
+ else:
+ result = sum_x
  else:
  result = NaN
 
  return result
 
 
 cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
- float64_t *compensation) nogil:
+ float64_t *compensation, int64_t *num_consecutive_same_value,
+ float64_t *prev_value) nogil:
  """ add a value from the sum calc using Kahan summation """
 
  cdef:
@@ -98,6 +104,14 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
  compensation[0] = t - sum_x[0] - y
  sum_x[0] = t
 
+ # GH#42064, record num of same values to remove floating point artifacts
+ if val == prev_value[0]:
+ num_consecutive_same_value[0] += 1
+ else:
+ # reset to 1 (include current value itself)
+ num_consecutive_same_value[0] = 1
+ prev_value[0] = val
+
 
 cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
  float64_t *compensation) nogil:
@@ -119,8 +133,8 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
  ndarray[int64_t] end, int64_t minp) -> np.ndarray:
  cdef:
  Py_ssize_t i, j
- float64_t sum_x, compensation_add, compensation_remove
- int64_t s, e
+ float64_t sum_x, compensation_add, compensation_remove, prev_value
+ int64_t s, e, num_consecutive_same_value
  int64_t nobs = 0, N = len(start)
  ndarray[float64_t] output
  bint is_monotonic_increasing_bounds
@@ -139,11 +153,13 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
  if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
 
  # setup
-
+ prev_value = values[s]
+ num_consecutive_same_value = 0
  sum_x = compensation_add = compensation_remove = 0
  nobs = 0
  for j in range(s, e):
- add_sum(values[j], &nobs, &sum_x, &compensation_add)
+ add_sum(values[j], &nobs, &sum_x, &compensation_add,
+ &num_consecutive_same_value, &prev_value)
 
  else:
 
@@ -153,9 +169,10 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
 
  # calculate adds
  for j in range(end[i - 1], e):
- add_sum(values[j], &nobs, &sum_x, &compensation_add)
+ add_sum(values[j], &nobs, &sum_x, &compensation_add,
+ &num_consecutive_same_value, &prev_value)
 
- output[i] = calc_sum(minp, nobs, sum_x)
+ output[i] = calc_sum(minp, nobs, sum_x, num_consecutive_same_value, prev_value)
 
  if not is_monotonic_increasing_bounds:
  nobs = 0
@@ -169,14 +186,17 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
 # Rolling mean
 
 
-cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
- Py_ssize_t neg_ct, float64_t sum_x) nogil:
+cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct,
+ float64_t sum_x, int64_t num_consecutive_same_value,
+ float64_t prev_value) nogil:
  cdef:
  float64_t result
 
  if nobs >= minp and nobs > 0:
  result = sum_x / <float64_t>nobs
- if neg_ct == 0 and result < 0:
+ if num_consecutive_same_value >= nobs:
+ result = prev_value
+ elif neg_ct == 0 and result < 0:
  # all positive
  result = 0
  elif neg_ct == nobs and result > 0:
@@ -190,7 +210,8 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs,
 
 
 cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
- Py_ssize_t *neg_ct, float64_t *compensation) nogil:
+ Py_ssize_t *neg_ct, float64_t *compensation,
+ int64_t *num_consecutive_same_value, float64_t *prev_value) nogil:
  """ add a value from the mean calc using Kahan summation """
  cdef:
  float64_t y, t
@@ -205,6 +226,14 @@ cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
  if signbit(val):
  neg_ct[0] = neg_ct[0] + 1
 
+ # GH#42064, record num of same values to remove floating point artifacts
+ if val == prev_value[0]:
+ num_consecutive_same_value[0] += 1
+ else:
+ # reset to 1 (include current value itself)
+ num_consecutive_same_value[0] = 1
+ prev_value[0] = val
+
 
 cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
  Py_ssize_t *neg_ct, float64_t *compensation) nogil:
@@ -225,8 +254,8 @@ cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
 def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
  ndarray[int64_t] end, int64_t minp) -> np.ndarray:
  cdef:
- float64_t val, compensation_add, compensation_remove, sum_x
- int64_t s, e
+ float64_t val, compensation_add, compensation_remove, sum_x, prev_value
+ int64_t s, e, num_consecutive_same_value
  Py_ssize_t nobs, i, j, neg_ct, N = len(start)
  ndarray[float64_t] output
  bint is_monotonic_increasing_bounds
@@ -244,12 +273,15 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
 
  if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
 
+ # setup
  compensation_add = compensation_remove = sum_x = 0
  nobs = neg_ct = 0
- # setup
+ prev_value = values[s]
+ num_consecutive_same_value = 0
  for j in range(s, e):
  val = values[j]
- add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add)
+ add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add,
+ &num_consecutive_same_value, &prev_value)
 
  else:
 
@@ -261,9 +293,10 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
  # calculate adds
  for j in range(end[i - 1], e):
  val = values[j]
- add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add)
+ add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add,
+ &num_consecutive_same_value, &prev_value)
 
- output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
+ output[i] = calc_mean(minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value)
 
  if not is_monotonic_increasing_bounds:
  nobs = 0
 
@@ -16,8 +16,14 @@
 
 @numba.jit(nopython=True, nogil=True, parallel=False)
 def add_mean(
- val: float, nobs: int, sum_x: float, neg_ct: int, compensation: float
-) -> tuple[int, float, int, float]:
+ val: float,
+ nobs: int,
+ sum_x: float,
+ neg_ct: int,
+ compensation: float,
+ num_consecutive_same_value: int,
+ prev_value: float,
+) -> tuple[int, float, int, float, int, float]:
  if not np.isnan(val):
  nobs += 1
  y = val - compensation
@@ -26,7 +32,14 @@ def add_mean(
  sum_x = t
  if val < 0:
  neg_ct += 1
- return nobs, sum_x, neg_ct, compensation
+
+ if val == prev_value:
+ num_consecutive_same_value += 1
+ else:
+ num_consecutive_same_value = 1
+ prev_value = val
+
+ return nobs, sum_x, neg_ct, compensation, num_consecutive_same_value, prev_value
 
 
 @numba.jit(nopython=True, nogil=True, parallel=False)
@@ -68,10 +81,26 @@ def sliding_mean(
  s = start[i]
  e = end[i]
  if i == 0 or not is_monotonic_increasing_bounds:
+ prev_value = values[s]
+ num_consecutive_same_value = 0
+
  for j in range(s, e):
  val = values[j]
- nobs, sum_x, neg_ct, compensation_add = add_mean(
- val, nobs, sum_x, neg_ct, compensation_add
+ (
+ nobs,
+ sum_x,
+ neg_ct,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
+ ) = add_mean(
+ val,
+ nobs,
+ sum_x,
+ neg_ct,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
  )
  else:
  for j in range(start[i - 1], s):
@@ -82,13 +111,28 @@ def sliding_mean(
 
  for j in range(end[i - 1], e):
  val = values[j]
- nobs, sum_x, neg_ct, compensation_add = add_mean(
- val, nobs, sum_x, neg_ct, compensation_add
+ (
+ nobs,
+ sum_x,
+ neg_ct,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
+ ) = add_mean(
+ val,
+ nobs,
+ sum_x,
+ neg_ct,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
  )
 
  if nobs >= min_periods and nobs > 0:
  result = sum_x / nobs
- if neg_ct == 0 and result < 0:
+ if num_consecutive_same_value >= nobs:
+ result = prev_value
+ elif neg_ct == 0 and result < 0:
  result = 0
  elif neg_ct == nobs and result > 0:
  result = 0
 
@@ -16,15 +16,27 @@
 
 @numba.jit(nopython=True, nogil=True, parallel=False)
 def add_sum(
- val: float, nobs: int, sum_x: float, compensation: float
-) -> tuple[int, float, float]:
+ val: float,
+ nobs: int,
+ sum_x: float,
+ compensation: float,
+ num_consecutive_same_value: int,
+ prev_value: float,
+) -> tuple[int, float, float, int, float]:
  if not np.isnan(val):
  nobs += 1
  y = val - compensation
  t = sum_x + y
  compensation = t - sum_x - y
  sum_x = t
- return nobs, sum_x, compensation
+
+ if val == prev_value:
+ num_consecutive_same_value += 1
+ else:
+ num_consecutive_same_value = 1
+ prev_value = val
+
+ return nobs, sum_x, compensation, num_consecutive_same_value, prev_value
 
 
 @numba.jit(nopython=True, nogil=True, parallel=False)
@@ -63,10 +75,24 @@ def sliding_sum(
  s = start[i]
  e = end[i]
  if i == 0 or not is_monotonic_increasing_bounds:
+ prev_value = values[s]
+ num_consecutive_same_value = 0
+
  for j in range(s, e):
  val = values[j]
- nobs, sum_x, compensation_add = add_sum(
- val, nobs, sum_x, compensation_add
+ (
+ nobs,
+ sum_x,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
+ ) = add_sum(
+ val,
+ nobs,
+ sum_x,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
  )
  else:
  for j in range(start[i - 1], s):
@@ -77,14 +103,28 @@ def sliding_sum(
 
  for j in range(end[i - 1], e):
  val = values[j]
- nobs, sum_x, compensation_add = add_sum(
- val, nobs, sum_x, compensation_add
+ (
+ nobs,
+ sum_x,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
+ ) = add_sum(
+ val,
+ nobs,
+ sum_x,
+ compensation_add,
+ num_consecutive_same_value,
+ prev_value,
  )
 
  if nobs == 0 == min_periods:
  result = 0.0
  elif nobs >= min_periods:
- result = sum_x
+ if num_consecutive_same_value >= nobs:
+ result = prev_value * nobs
+ else:
+ result = sum_x
  else:
  result = np.nan
 
 
@@ -1849,3 +1849,14 @@ def test_rolling_var_same_value_count_logic(values, window, min_periods, expecte
  result_std = sr.rolling(window, min_periods=min_periods).std()
  tm.assert_series_equal(result_std, np.sqrt(expected))
  tm.assert_series_equal(expected == 0, result_std == 0)
+
+
+def test_rolling_mean_sum_floating_artifacts():
+ # GH 42064.
+
+ sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
+ r = sr.rolling(3)
+ result = r.mean()
+ assert (result[-3:] == 0).all()
+ result = r.sum()
+ assert (result[-3:] == 0).all()