@@ -455,8 +455,9 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
455455
456456
457457cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
458- float64_t x, float64_t xx,
459- float64_t xxx) nogil:
458+ float64_t x, float64_t xx, float64_t xxx,
459+ int64_t num_consecutive_same_value
460+ ) nogil:
460461 cdef:
461462 float64_t result, dnobs
462463 float64_t A, B, C, R
@@ -467,6 +468,12 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
467468 B = xx / dnobs - A * A
468469 C = xxx / dnobs - A * A * A - 3 * A * B
469470
471+ if nobs < 3 :
472+ result = NaN
473+ # GH 42064 46431
474+ # uniform case, force result to be 0
475+ elif num_consecutive_same_value >= nobs:
476+ result = 0.0
470477 # #18044: with uniform distribution, floating issue will
471478 # cause B != 0. and cause the result is a very
472479 # large number.
@@ -476,7 +483,7 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
476483 # if the variance is less than 1e-14, it could be
477484 # treat as zero, here we follow the original
478485 # skew/kurt behaviour to check B <= 1e-14
479- if B <= 1e-14 or nobs < 3 :
486+ elif B <= 1e-14 :
480487 result = NaN
481488 else :
482489 R = sqrt(B)
@@ -493,7 +500,10 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
493500 float64_t * xxx,
494501 float64_t * compensation_x,
495502 float64_t * compensation_xx,
496- float64_t * compensation_xxx) nogil:
503+ float64_t * compensation_xxx,
504+ int64_t * num_consecutive_same_value,
505+ float64_t * prev_value,
506+ ) nogil:
497507 """ add a value from the skew calc """
498508 cdef:
499509 float64_t y, t
@@ -515,6 +525,14 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
515525 compensation_xxx[0 ] = t - xxx[0 ] - y
516526 xxx[0 ] = t
517527
528+ # GH#42064, record num of same values to remove floating point artifacts
529+ if val == prev_value[0 ]:
530+ num_consecutive_same_value[0 ] += 1
531+ else :
532+ # reset to 1 (include current value itself)
533+ num_consecutive_same_value[0 ] = 1
534+ prev_value[0 ] = val
535+
518536
519537cdef inline void remove_skew(float64_t val, int64_t * nobs,
520538 float64_t * x, float64_t * xx,
@@ -553,8 +571,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
553571 float64_t compensation_xx_add , compensation_xx_remove
554572 float64_t compensation_x_add , compensation_x_remove
555573 float64_t x , xx , xxx
574+ float64_t prev_value
556575 int64_t nobs = 0 , N = len (start), V = len (values), nobs_mean = 0
557- int64_t s , e
576+ int64_t s , e , num_consecutive_same_value
558577 ndarray[float64_t] output , mean_array , values_copy
559578 bint is_monotonic_increasing_bounds
560579
@@ -588,6 +607,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
588607 # never removed
589608 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
590609
610+ prev_value = values[s]
611+ num_consecutive_same_value = 0
612+
591613 compensation_xxx_add = compensation_xxx_remove = 0
592614 compensation_xx_add = compensation_xx_remove = 0
593615 compensation_x_add = compensation_x_remove = 0
@@ -596,7 +618,8 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
596618 for j in range (s, e):
597619 val = values_copy[j]
598620 add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
599- & compensation_xx_add, & compensation_xxx_add)
621+ & compensation_xx_add, & compensation_xxx_add,
622+ & num_consecutive_same_value, & prev_value)
600623
601624 else :
602625
@@ -612,9 +635,10 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
612635 for j in range (end[i - 1 ], e):
613636 val = values_copy[j]
614637 add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
615- & compensation_xx_add, & compensation_xxx_add)
638+ & compensation_xx_add, & compensation_xxx_add,
639+ & num_consecutive_same_value, & prev_value)
616640
617- output[i] = calc_skew(minp, nobs, x, xx, xxx)
641+ output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value )
618642
619643 if not is_monotonic_increasing_bounds:
620644 nobs = 0
@@ -630,35 +654,44 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
630654
631655cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs,
632656 float64_t x, float64_t xx,
633- float64_t xxx, float64_t xxxx) nogil:
657+ float64_t xxx, float64_t xxxx,
658+ int64_t num_consecutive_same_value,
659+ ) nogil:
634660 cdef:
635661 float64_t result, dnobs
636662 float64_t A, B, C, D, R, K
637663
638664 if nobs >= minp:
639- dnobs = < float64_t> nobs
640- A = x / dnobs
641- R = A * A
642- B = xx / dnobs - R
643- R = R * A
644- C = xxx / dnobs - R - 3 * A * B
645- R = R * A
646- D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
647-
648- # #18044: with uniform distribution, floating issue will
649- # cause B != 0. and cause the result is a very
650- # large number.
651- #
652- # in core/nanops.py nanskew/nankurt call the function
653- # _zero_out_fperr(m2) to fix floating error.
654- # if the variance is less than 1e-14, it could be
655- # treat as zero, here we follow the original
656- # skew/kurt behaviour to check B <= 1e-14
657- if B <= 1e-14 or nobs < 4 :
665+ if nobs < 4 :
658666 result = NaN
667+ # GH 42064 46431
668+ # uniform case, force result to be -3.
669+ elif num_consecutive_same_value >= nobs:
670+ result = - 3.
659671 else :
660- K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
661- result = K / ((dnobs - 2. ) * (dnobs - 3. ))
672+ dnobs = < float64_t> nobs
673+ A = x / dnobs
674+ R = A * A
675+ B = xx / dnobs - R
676+ R = R * A
677+ C = xxx / dnobs - R - 3 * A * B
678+ R = R * A
679+ D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
680+
681+ # #18044: with uniform distribution, floating issue will
682+ # cause B != 0. and cause the result is a very
683+ # large number.
684+ #
685+ # in core/nanops.py nanskew/nankurt call the function
686+ # _zero_out_fperr(m2) to fix floating error.
687+ # if the variance is less than 1e-14, it could be
688+ # treat as zero, here we follow the original
689+ # skew/kurt behaviour to check B <= 1e-14
690+ if B <= 1e-14 :
691+ result = NaN
692+ else :
693+ K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
694+ result = K / ((dnobs - 2. ) * (dnobs - 3. ))
662695 else :
663696 result = NaN
664697
@@ -671,7 +704,10 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
671704 float64_t * compensation_x,
672705 float64_t * compensation_xx,
673706 float64_t * compensation_xxx,
674- float64_t * compensation_xxxx) nogil:
707+ float64_t * compensation_xxxx,
708+ int64_t * num_consecutive_same_value,
709+ float64_t * prev_value
710+ ) nogil:
675711 """ add a value from the kurotic calc """
676712 cdef:
677713 float64_t y, t
@@ -697,6 +733,14 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
697733 compensation_xxxx[0 ] = t - xxxx[0 ] - y
698734 xxxx[0 ] = t
699735
736+ # GH#42064, record num of same values to remove floating point artifacts
737+ if val == prev_value[0 ]:
738+ num_consecutive_same_value[0 ] += 1
739+ else :
740+ # reset to 1 (include current value itself)
741+ num_consecutive_same_value[0 ] = 1
742+ prev_value[0 ] = val
743+
700744
701745cdef inline void remove_kurt(float64_t val, int64_t * nobs,
702746 float64_t * x, float64_t * xx,
@@ -741,7 +785,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
741785 float64_t compensation_xx_remove , compensation_xx_add
742786 float64_t compensation_x_remove , compensation_x_add
743787 float64_t x , xx , xxx , xxxx
744- int64_t nobs , s , e , N = len (start), V = len (values), nobs_mean = 0
788+ float64_t prev_value
789+ int64_t nobs , s , e , num_consecutive_same_value
790+ int64_t N = len (start), V = len (values), nobs_mean = 0
745791 ndarray[float64_t] output , values_copy
746792 bint is_monotonic_increasing_bounds
747793
@@ -775,6 +821,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
775821 # never removed
776822 if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
777823
824+ prev_value = values[s]
825+ num_consecutive_same_value = 0
826+
778827 compensation_xxxx_add = compensation_xxxx_remove = 0
779828 compensation_xxx_remove = compensation_xxx_add = 0
780829 compensation_xx_remove = compensation_xx_add = 0
@@ -784,7 +833,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
784833 for j in range (s, e):
785834 add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
786835 & compensation_x_add, & compensation_xx_add,
787- & compensation_xxx_add, & compensation_xxxx_add)
836+ & compensation_xxx_add, & compensation_xxxx_add,
837+ & num_consecutive_same_value, & prev_value)
788838
789839 else :
790840
@@ -800,9 +850,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
800850 for j in range (end[i - 1 ], e):
801851 add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
802852 & compensation_x_add, & compensation_xx_add,
803- & compensation_xxx_add, & compensation_xxxx_add)
853+ & compensation_xxx_add, & compensation_xxxx_add,
854+ & num_consecutive_same_value, & prev_value)
804855
805- output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
856+ output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value )
806857
807858 if not is_monotonic_increasing_bounds:
808859 nobs = 0
0 commit comments