@@ -585,10 +585,12 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
585585nodes , err := c .CoreV1 ().Nodes ().List (ctx , metav1.ListOptions {})
586586framework .ExpectNoError (err )
587587nodeCount := len (nodes .Items )
588- retryTimeout := dsRetryTimeout + time .Duration (nodeCount * 30 )* time .Second
588+ // We disturb daemonset progress by randomly terminating pods.
589+ randomPodTerminationTimeout := 5 * time .Minute
590+ retryTimeout := dsRetryTimeout + randomPodTerminationTimeout + time .Duration (nodeCount * 30 )* time .Second
589591
590592ginkgo .By ("Check that daemon pods surge and invariants are preserved during that rollout" )
591- ageOfOldPod := make (map [string ]time.Time )
593+ nodeToAgeOfOldPod := make (map [ string ] map [string ]time.Time )
592594deliberatelyDeletedPods := sets .NewString ()
593595err = wait .PollUntilContextTimeout (ctx , dsRetryPeriod , retryTimeout , true , func (ctx context.Context ) (bool , error ) {
594596podList , err := c .CoreV1 ().Pods (ds .Namespace ).List (ctx , metav1.ListOptions {})
@@ -682,17 +684,25 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
682684// if this is a pod in an older version AND there is a new version of this pod, record when
683685// we started seeing this, otherwise delete the record (perhaps the node was drained)
684686if nodesToVersions [pod .Spec .NodeName ][newVersion ] > 0 {
685- if _ , ok := ageOfOldPod [string (pod .UID )]; ! ok {
686- ageOfOldPod [string (pod .UID )] = now
687+ if _ , ok := nodeToAgeOfOldPod [pod .Spec .NodeName ][string (pod .UID )]; ! ok {
688+ if _ , ok := nodeToAgeOfOldPod [pod .Spec .NodeName ]; ! ok {
689+ nodeToAgeOfOldPod [pod .Spec .NodeName ] = make (map [string ]time.Time )
690+ }
691+ nodeToAgeOfOldPod [pod .Spec .NodeName ][string (pod .UID )] = now
687692}
688693} else {
689- delete (ageOfOldPod , string ( pod .UID ) )
694+ delete (nodeToAgeOfOldPod , pod .Spec . NodeName )
690695}
691696}
692697// purge the old pods list of any deleted pods
693- for uid := range ageOfOldPod {
694- if ! podUIDs .Has (uid ) {
695- delete (ageOfOldPod , uid )
698+ for node , uidToTime := range nodeToAgeOfOldPod {
699+ for uid := range uidToTime {
700+ if ! podUIDs .Has (uid ) {
701+ delete (uidToTime , uid )
702+ }
703+ }
704+ if len (uidToTime ) == 0 {
705+ delete (nodeToAgeOfOldPod , node )
696706}
697707}
698708deliberatelyDeletedPods = deliberatelyDeletedPods .Intersection (deletedPodUIDs )
@@ -713,9 +723,11 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
713723}
714724
715725// invariant: the controller must react to the new pod becoming ready within a reasonable timeframe (2x grace period)
716- for uid , firstSeen := range ageOfOldPod {
717- if now .Sub (firstSeen ) > maxSurgeOverlap {
718- errs = append (errs , fmt .Sprintf ("An old pod with UID %s has been running alongside a newer version for longer than %s" , uid , maxSurgeOverlap ))
726+ for node , uidToTime := range nodeToAgeOfOldPod {
727+ for uid , firstSeenSinceNewVersionPod := range uidToTime {
728+ if now .Sub (firstSeenSinceNewVersionPod ) > maxSurgeOverlap {
729+ errs = append (errs , fmt .Sprintf ("An old pod with UID %s on a node %s has been running alongside a newer version for longer than %s" , uid , node , maxSurgeOverlap ))
730+ }
719731}
720732}
721733
@@ -800,6 +812,9 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
800812} else {
801813framework .Logf ("Deleted pod %s prematurely" , pod .Name )
802814deliberatelyDeletedPods .Insert (string (pod .UID ))
815+ // If it is an old version we do not need to measure the controller reaction because we have done it instead.
816+ // If it is a new version, we have to reset the time to start counting the time for the replacement pod to reach readiness again.
817+ delete (nodeToAgeOfOldPod , pod .Spec .NodeName )
803818}
804819}
805820}
0 commit comments