Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions changelog/fragments/1758287649-fix-liveness_units.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# REQUIRED
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# REQUIRED for all kinds
# Change summary; a 80ish characters long description of the change.
summary: Include components units status in HTTP liveness checks

# REQUIRED for breaking-change, deprecation, known-issue
# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# description:

# REQUIRED for breaking-change, deprecation, known-issue
# impact:

# REQUIRED for breaking-change, deprecation, known-issue
# action:

# REQUIRED for all kinds
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# AUTOMATED
# OPTIONAL to manually add other PR URLs
# PR URL: A link the PR that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
# pr: https://github.com/owner/repo/1234

# AUTOMATED
# OPTIONAL to manually add other issue URLs
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/8047
10 changes: 3 additions & 7 deletions internal/pkg/agent/application/monitoring/liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/elastic/elastic-agent-client/v7/pkg/client"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/monitoringhelpers"
"github.com/elastic/elastic-agent/internal/pkg/otel/otelhelpers"
agentclient "github.com/elastic/elastic-agent/pkg/control/v2/client"
)
Expand Down Expand Up @@ -83,13 +84,8 @@ func livenessHandler(coord CoordinatorState) func(http.ResponseWriter, *http.Req
return nil
}

unhealthyComponent := false
for _, comp := range state.Components {
if (failConfig.Failed && comp.State.State == client.UnitStateFailed) || (failConfig.Degraded && comp.State.State == client.UnitStateDegraded) {
unhealthyComponent = true
}
}
if state.Collector != nil {
unhealthyComponent := (failConfig.Failed && monitoringhelpers.HaveState(state.Components, client.UnitStateFailed)) || (failConfig.Degraded && monitoringhelpers.HaveState(state.Components, client.UnitStateDegraded))
if !unhealthyComponent && state.Collector != nil {
if (failConfig.Failed && (otelhelpers.HasStatus(state.Collector, componentstatus.StatusFatalError) || otelhelpers.HasStatus(state.Collector, componentstatus.StatusPermanentError))) || (failConfig.Degraded && otelhelpers.HasStatus(state.Collector, componentstatus.StatusRecoverableError)) {
unhealthyComponent = true
}
Expand Down
96 changes: 96 additions & 0 deletions internal/pkg/agent/application/monitoring/liveness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,102 @@ func TestLivenessProcessHTTPHandler(t *testing.T) {
expectedCode: 500,
failon: "degraded",
},
{
name: "component healthy and unit degraded",
coord: mockCoordinator{
isUp: true,
state: coordinator.State{
Components: []runtime.ComponentComponentState{
{
LegacyPID: "2",
State: runtime.ComponentState{
State: client.UnitStateHealthy,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
{
UnitType: client.UnitTypeInput,
UnitID: "some-input-unit",
}: {
State: client.UnitStateDegraded,
},
},
},
Component: component.Component{
ID: "test-component",
InputSpec: &component.InputRuntimeSpec{
BinaryName: "testbeat",
},
},
},
},
},
},
expectedCode: 500,
failon: "degraded",
},
{
name: "component healthy and unit failed",
coord: mockCoordinator{
isUp: true,
state: coordinator.State{
Components: []runtime.ComponentComponentState{
{
LegacyPID: "2",
State: runtime.ComponentState{
State: client.UnitStateHealthy,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
{
UnitType: client.UnitTypeInput,
UnitID: "some-input-unit",
}: {
State: client.UnitStateFailed,
},
},
},
Component: component.Component{
ID: "test-component",
InputSpec: &component.InputRuntimeSpec{
BinaryName: "testbeat",
},
},
},
},
},
},
expectedCode: 500,
failon: "failed",
},
{
name: "component healthy and unit healty",
coord: mockCoordinator{
isUp: true,
state: coordinator.State{
Components: []runtime.ComponentComponentState{
{
LegacyPID: "2",
State: runtime.ComponentState{
State: client.UnitStateHealthy,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
{
UnitType: client.UnitTypeInput,
UnitID: "some-input-unit",
}: {
State: client.UnitStateHealthy,
},
},
},
Component: component.Component{
ID: "test-component",
InputSpec: &component.InputRuntimeSpec{
BinaryName: "testbeat",
},
},
},
},
},
},
expectedCode: 200,
failon: "failed",
},
}

// test with processesHandler
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.

package monitoringhelpers

import (
"github.com/elastic/elastic-agent-client/v7/pkg/client"
"github.com/elastic/elastic-agent/pkg/component/runtime"
)

// HaveState returns true if any of the components or any of its units has the given state
func HaveState(components []runtime.ComponentComponentState, state client.UnitState) bool {
for _, component := range components {
if component.State.State == state {
return true
}
for _, unit := range component.State.Units {
if unit.State == state {
return true
}
}
}
return false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License 2.0;
// you may not use this file except in compliance with the Elastic License 2.0.

package monitoringhelpers

import (
"testing"

"github.com/elastic/elastic-agent-client/v7/pkg/client"
"github.com/elastic/elastic-agent/pkg/component/runtime"
)

func TestComponentsHasState(t *testing.T) {
tests := []struct {
name string
components []runtime.ComponentComponentState
state client.UnitState
expected bool
}{
{
name: "component with no units matches state",
components: []runtime.ComponentComponentState{
{
State: runtime.ComponentState{
State: client.UnitStateHealthy,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{},
},
},
},
state: client.UnitStateHealthy,
expected: true,
},
{
name: "component with units in different state matches state",
components: []runtime.ComponentComponentState{
{
State: runtime.ComponentState{
State: client.UnitStateHealthy,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
{
UnitType: client.UnitTypeInput,
UnitID: "some-input-unit",
}: {
State: client.UnitStateFailed,
},
},
},
},
},
state: client.UnitStateHealthy,
expected: true,
},
{
name: "unit matches state",
components: []runtime.ComponentComponentState{
{
State: runtime.ComponentState{
State: client.UnitStateDegraded,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
{
UnitType: client.UnitTypeInput,
UnitID: "some-input-unit",
}: {
State: client.UnitStateHealthy,
},
},
},
},
},
state: client.UnitStateHealthy,
expected: true,
},
{
name: "no match in single component",
components: []runtime.ComponentComponentState{
{
State: runtime.ComponentState{
State: client.UnitStateDegraded,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{
{
UnitType: client.UnitTypeInput,
UnitID: "some-input-unit",
}: {
State: client.UnitStateStopped,
},
},
},
},
},
state: client.UnitStateHealthy,
expected: false,
},
{
name: "match in second component",
components: []runtime.ComponentComponentState{
{
State: runtime.ComponentState{
State: client.UnitStateDegraded,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{},
},
},
{
State: runtime.ComponentState{
State: client.UnitStateHealthy,
Units: map[runtime.ComponentUnitKey]runtime.ComponentUnitState{},
},
},
},
state: client.UnitStateHealthy,
expected: true,
},
{
name: "empty components slice",
components: []runtime.ComponentComponentState{},
state: client.UnitStateHealthy,
expected: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := HaveState(tt.components, tt.state)
if result != tt.expected {
t.Errorf("expected %v, got %v", tt.expected, result)
}
})
}
}
Loading