pandas-dev
diff --git a/‎doc/source/whatsnew/v1.5.0.rst‎
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.5.0.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/_libs/window/aggregations.pyx‎
Lines changed: 86 additions & 35 deletions b/‎pandas/_libs/window/aggregations.pyx‎
Lines changed: 86 additions & 35 deletions
diff --git a/‎pandas/tests/window/test_rolling.py‎
Lines changed: 11 additions & 0 deletions b/‎pandas/tests/window/test_rolling.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎pandas/tests/window/test_rolling_skew_kurt.py‎
Lines changed: 15 additions & 9 deletions b/‎pandas/tests/window/test_rolling_skew_kurt.py‎
Lines changed: 15 additions & 9 deletions
@@ -732,6 +732,7 @@ Groupby/resample/rolling
 - Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)
 - Bug in :meth:`Rolling.sum` and :meth:`Rolling.mean` would give incorrect result with window of same values (:issue:`42064`, :issue:`46431`)
 - Bug in :meth:`Rolling.var` and :meth:`Rolling.std` would give non-zero result with window of same values (:issue:`42064`)
+- Bug in :meth:`Rolling.skew` and :meth:`Rolling.kurt` would give NaN with window of same values (:issue:`30993`)
 - Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
 - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
 - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)
 
@@ -455,8 +455,9 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
 
 
 cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
- float64_t x, float64_t xx,
- float64_t xxx) nogil:
+ float64_t x, float64_t xx, float64_t xxx,
+ int64_t num_consecutive_same_value
+ ) nogil:
  cdef:
  float64_t result, dnobs
  float64_t A, B, C, R
@@ -467,6 +468,12 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
  B = xx / dnobs - A * A
  C = xxx / dnobs - A * A * A - 3 * A * B
 
+ if nobs < 3:
+ result = NaN
+ # GH 42064 46431
+ # uniform case, force result to be 0
+ elif num_consecutive_same_value >= nobs:
+ result = 0.0
  # #18044: with uniform distribution, floating issue will
  # cause B != 0. and cause the result is a very
  # large number.
@@ -476,7 +483,7 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
  # if the variance is less than 1e-14, it could be
  # treat as zero, here we follow the original
  # skew/kurt behaviour to check B <= 1e-14
- if B <= 1e-14 or nobs < 3:
+ elif B <= 1e-14:
  result = NaN
  else:
  R = sqrt(B)
@@ -493,7 +500,10 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
  float64_t *xxx,
  float64_t *compensation_x,
  float64_t *compensation_xx,
- float64_t *compensation_xxx) nogil:
+ float64_t *compensation_xxx,
+ int64_t *num_consecutive_same_value,
+ float64_t *prev_value,
+ ) nogil:
  """ add a value from the skew calc """
  cdef:
  float64_t y, t
@@ -515,6 +525,14 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
  compensation_xxx[0] = t - xxx[0] - y
  xxx[0] = t
 
+ # GH#42064, record num of same values to remove floating point artifacts
+ if val == prev_value[0]:
+ num_consecutive_same_value[0] += 1
+ else:
+ # reset to 1 (include current value itself)
+ num_consecutive_same_value[0] = 1
+ prev_value[0] = val
+
 
 cdef inline void remove_skew(float64_t val, int64_t *nobs,
  float64_t *x, float64_t *xx,
@@ -553,8 +571,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
  float64_t compensation_xx_add, compensation_xx_remove
  float64_t compensation_x_add, compensation_x_remove
  float64_t x, xx, xxx
+ float64_t prev_value
  int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0
- int64_t s, e
+ int64_t s, e, num_consecutive_same_value
  ndarray[float64_t] output, mean_array, values_copy
  bint is_monotonic_increasing_bounds
 
@@ -588,6 +607,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
  # never removed
  if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
 
+ prev_value = values[s]
+ num_consecutive_same_value = 0
+
  compensation_xxx_add = compensation_xxx_remove = 0
  compensation_xx_add = compensation_xx_remove = 0
  compensation_x_add = compensation_x_remove = 0
@@ -596,7 +618,8 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
  for j in range(s, e):
  val = values_copy[j]
  add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
- &compensation_xx_add, &compensation_xxx_add)
+ &compensation_xx_add, &compensation_xxx_add,
+ &num_consecutive_same_value, &prev_value)
 
  else:
 
@@ -612,9 +635,10 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
  for j in range(end[i - 1], e):
  val = values_copy[j]
  add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
- &compensation_xx_add, &compensation_xxx_add)
+ &compensation_xx_add, &compensation_xxx_add,
+ &num_consecutive_same_value, &prev_value)
 
- output[i] = calc_skew(minp, nobs, x, xx, xxx)
+ output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value)
 
  if not is_monotonic_increasing_bounds:
  nobs = 0
@@ -630,35 +654,44 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
 
 cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs,
  float64_t x, float64_t xx,
- float64_t xxx, float64_t xxxx) nogil:
+ float64_t xxx, float64_t xxxx,
+ int64_t num_consecutive_same_value,
+ ) nogil:
  cdef:
  float64_t result, dnobs
  float64_t A, B, C, D, R, K
 
  if nobs >= minp:
- dnobs = <float64_t>nobs
- A = x / dnobs
- R = A * A
- B = xx / dnobs - R
- R = R * A
- C = xxx / dnobs - R - 3 * A * B
- R = R * A
- D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
-
- # #18044: with uniform distribution, floating issue will
- # cause B != 0. and cause the result is a very
- # large number.
- #
- # in core/nanops.py nanskew/nankurt call the function
- # _zero_out_fperr(m2) to fix floating error.
- # if the variance is less than 1e-14, it could be
- # treat as zero, here we follow the original
- # skew/kurt behaviour to check B <= 1e-14
- if B <= 1e-14 or nobs < 4:
+ if nobs < 4:
  result = NaN
+ # GH 42064 46431
+ # uniform case, force result to be -3.
+ elif num_consecutive_same_value >= nobs:
+ result = -3.
  else:
- K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
- result = K / ((dnobs - 2.) * (dnobs - 3.))
+ dnobs = <float64_t>nobs
+ A = x / dnobs
+ R = A * A
+ B = xx / dnobs - R
+ R = R * A
+ C = xxx / dnobs - R - 3 * A * B
+ R = R * A
+ D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
+
+ # #18044: with uniform distribution, floating issue will
+ # cause B != 0. and cause the result is a very
+ # large number.
+ #
+ # in core/nanops.py nanskew/nankurt call the function
+ # _zero_out_fperr(m2) to fix floating error.
+ # if the variance is less than 1e-14, it could be
+ # treat as zero, here we follow the original
+ # skew/kurt behaviour to check B <= 1e-14
+ if B <= 1e-14:
+ result = NaN
+ else:
+ K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
+ result = K / ((dnobs - 2.) * (dnobs - 3.))
  else:
  result = NaN
 
@@ -671,7 +704,10 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
  float64_t *compensation_x,
  float64_t *compensation_xx,
  float64_t *compensation_xxx,
- float64_t *compensation_xxxx) nogil:
+ float64_t *compensation_xxxx,
+ int64_t *num_consecutive_same_value,
+ float64_t *prev_value
+ ) nogil:
  """ add a value from the kurotic calc """
  cdef:
  float64_t y, t
@@ -697,6 +733,14 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
  compensation_xxxx[0] = t - xxxx[0] - y
  xxxx[0] = t
 
+ # GH#42064, record num of same values to remove floating point artifacts
+ if val == prev_value[0]:
+ num_consecutive_same_value[0] += 1
+ else:
+ # reset to 1 (include current value itself)
+ num_consecutive_same_value[0] = 1
+ prev_value[0] = val
+
 
 cdef inline void remove_kurt(float64_t val, int64_t *nobs,
  float64_t *x, float64_t *xx,
@@ -741,7 +785,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
  float64_t compensation_xx_remove, compensation_xx_add
  float64_t compensation_x_remove, compensation_x_add
  float64_t x, xx, xxx, xxxx
- int64_t nobs, s, e, N = len(start), V = len(values), nobs_mean = 0
+ float64_t prev_value
+ int64_t nobs, s, e, num_consecutive_same_value
+ int64_t N = len(start), V = len(values), nobs_mean = 0
  ndarray[float64_t] output, values_copy
  bint is_monotonic_increasing_bounds
 
@@ -775,6 +821,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
  # never removed
  if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
 
+ prev_value = values[s]
+ num_consecutive_same_value = 0
+
  compensation_xxxx_add = compensation_xxxx_remove = 0
  compensation_xxx_remove = compensation_xxx_add = 0
  compensation_xx_remove = compensation_xx_add = 0
@@ -784,7 +833,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
  for j in range(s, e):
  add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
  &compensation_x_add, &compensation_xx_add,
- &compensation_xxx_add, &compensation_xxxx_add)
+ &compensation_xxx_add, &compensation_xxxx_add,
+ &num_consecutive_same_value, &prev_value)
 
  else:
 
@@ -800,9 +850,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
  for j in range(end[i - 1], e):
  add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
  &compensation_x_add, &compensation_xx_add,
- &compensation_xxx_add, &compensation_xxxx_add)
+ &compensation_xxx_add, &compensation_xxxx_add,
+ &num_consecutive_same_value, &prev_value)
 
- output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
+ output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value)
 
  if not is_monotonic_increasing_bounds:
  nobs = 0
 
@@ -1860,3 +1860,14 @@ def test_rolling_mean_sum_floating_artifacts():
  assert (result[-3:] == 0).all()
  result = r.sum()
  assert (result[-3:] == 0).all()
+
+
+def test_rolling_skew_kurt_floating_artifacts():
+ # GH 42064 46431
+
+ sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
+ r = sr.rolling(4)
+ result = r.skew()
+ assert (result[-2:] == 0).all()
+ result = r.kurt()
+ assert (result[-2:] == -3).all()
@@ -178,17 +178,18 @@ def test_center_reindex_frame(frame, roll_func):
 
 def test_rolling_skew_edge_cases(step):
 
- all_nan = Series([np.NaN] * 5)[::step]
-
+ expected = Series([np.NaN] * 4 + [0.0])[::step]
  # yields all NaN (0 variance)
  d = Series([1] * 5)
  x = d.rolling(window=5, step=step).skew()
- tm.assert_series_equal(all_nan, x)
+ # index 4 should be 0 as it contains 5 same obs
+ tm.assert_series_equal(expected, x)
 
+ expected = Series([np.NaN] * 5)[::step]
  # yields all NaN (window too small)
  d = Series(np.random.randn(5))
  x = d.rolling(window=2, step=step).skew()
- tm.assert_series_equal(all_nan, x)
+ tm.assert_series_equal(expected, x)
 
  # yields [NaN, NaN, NaN, 0.177994, 1.548824]
  d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
@@ -199,17 +200,18 @@ def test_rolling_skew_edge_cases(step):
 
 def test_rolling_kurt_edge_cases(step):
 
- all_nan = Series([np.NaN] * 5)[::step]
+ expected = Series([np.NaN] * 4 + [-3.0])[::step]
 
  # yields all NaN (0 variance)
  d = Series([1] * 5)
  x = d.rolling(window=5, step=step).kurt()
- tm.assert_series_equal(all_nan, x)
+ tm.assert_series_equal(expected, x)
 
  # yields all NaN (window too small)
+ expected = Series([np.NaN] * 5)[::step]
  d = Series(np.random.randn(5))
  x = d.rolling(window=3, step=step).kurt()
- tm.assert_series_equal(all_nan, x)
+ tm.assert_series_equal(expected, x)
 
  # yields [NaN, NaN, NaN, 1.224307, 2.671499]
  d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
@@ -220,11 +222,15 @@ def test_rolling_kurt_edge_cases(step):
 
 def test_rolling_skew_eq_value_fperr(step):
  # #18804 all rolling skew for all equal values should return Nan
+ # #46717 update: all equal values should return 0 instead of NaN
  a = Series([1.1] * 15).rolling(window=10, step=step).skew()
- assert np.isnan(a).all()
+ assert (a[a.index >= 9] == 0).all()
+ assert a[a.index < 9].isna().all()
 
 
 def test_rolling_kurt_eq_value_fperr(step):
  # #18804 all rolling kurt for all equal values should return Nan
+ # #46717 update: all equal values should return -3 instead of NaN
  a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
- assert np.isnan(a).all()
+ assert (a[a.index >= 9] == -3).all()
+ assert a[a.index < 9].isna().all()