Skip to content

Commit 906b41c

Browse files
Retry saving Agent information file (fleet.enc) on Windows (#9224) (#9507)
* Retry saving Agent information file on Windows * Implement retries on saving fleet.enc file * Remove previous implementation * Adding CHANGELOG file * Running mage fmt * Remove unnecessary changes (cherry picked from commit ead9249) Co-authored-by: Shaunak Kashyap <ycombinator@gmail.com>
1 parent c311243 commit 906b41c

File tree

5 files changed

+159
-9
lines changed

5 files changed

+159
-9
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: On Windows, retry saving the Agent information file to disk
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
description: >
20+
Saving the Agent information file involves renaming/moving a file to its final
21+
destination. However, on Windows, it is sometimes not possible to rename/move a
22+
file to its destination file because the destination file is locked by another
23+
process (e.g. antivirus software). For such situations, we now retry the save operation
24+
on Windows.
25+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
26+
component: elastic-agent
27+
28+
# PR URL; optional; the PR number that added the changeset.
29+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
30+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
31+
# Please provide it if you are adding a fragment for a different PR.
32+
pr: https://github.com/elastic/elastic-agent/pull/9224
33+
34+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
35+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
36+
issue: https://github.com/elastic/elastic-agent/issues/5862

internal/pkg/agent/application/actions/handlers/handler_action_policy_change.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ func (h *PolicyChangeHandler) handlePolicyChange(ctx context.Context, c *config.
318318
}
319319

320320
// persist configuration
321-
err = saveConfig(h.agentInfo, h.config, h.store)
321+
err = saveConfig(h.agentInfo, h.config, h.store, h.log)
322322
if err != nil {
323323
return fmt.Errorf("saving config: %w", err)
324324
}
@@ -406,7 +406,7 @@ func (h *PolicyChangeHandler) applyLoggingConfig(ctx context.Context, loggingCon
406406
return h.policyLogLevelSetter.SetLogLevel(ctx, policyLogLevel)
407407
}
408408

409-
func saveConfig(agentInfo info.Agent, validatedConfig *configuration.Configuration, store storage.Store) error {
409+
func saveConfig(agentInfo info.Agent, validatedConfig *configuration.Configuration, store storage.Store, log *logger.Logger) error {
410410
if validatedConfig == nil {
411411
// nothing to do for fleet hosts
412412
return nil
@@ -418,8 +418,7 @@ func saveConfig(agentInfo info.Agent, validatedConfig *configuration.Configurati
418418
errors.TypeUnexpected, errors.M("hosts", validatedConfig.Fleet.Client.Hosts))
419419
}
420420

421-
err = store.Save(reader)
422-
if err != nil {
421+
if err := saveConfigToStore(store, reader, log); err != nil {
423422
return errors.New(
424423
err, "fail to persist new Fleet Server API client hosts",
425424
errors.TypeFilesystem, errors.M("hosts", validatedConfig.Fleet.Client.Hosts))
@@ -475,7 +474,7 @@ func clientEqual(k1 remote.Config, k2 remote.Config) bool {
475474
return true
476475
}
477476

478-
func fleetToReader(agentID string, headers map[string]string, cfg *configuration.Configuration) (io.Reader, error) {
477+
func fleetToReader(agentID string, headers map[string]string, cfg *configuration.Configuration) (io.ReadSeeker, error) {
479478
configToStore := map[string]interface{}{
480479
"fleet": cfg.Fleet,
481480
"agent": map[string]interface{}{ // Add event logging configuration here!
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
//go:build !windows
6+
7+
package handlers
8+
9+
import (
10+
"io"
11+
12+
"github.com/elastic/elastic-agent/pkg/core/logger"
13+
14+
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
15+
)
16+
17+
// saveConfigToStore saves the given configuration (reader) to the given store
18+
func saveConfigToStore(store storage.Store, reader io.Reader, _ *logger.Logger) error {
19+
return store.Save(reader)
20+
}

internal/pkg/agent/application/actions/handlers/handler_helpers_test.go

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,23 @@ package handlers
77
import (
88
"context"
99
"math"
10+
"math/rand/v2"
11+
"os"
12+
"path/filepath"
13+
"strings"
1014
"testing"
1115
"time"
1216

17+
"github.com/google/go-cmp/cmp"
18+
"github.com/google/go-cmp/cmp/cmpopts"
19+
"github.com/stretchr/testify/require"
20+
1321
"github.com/elastic/elastic-agent-client/v7/pkg/proto"
1422
"github.com/elastic/elastic-agent-libs/logp"
15-
1623
"github.com/elastic/elastic-agent/internal/pkg/agent/errors"
24+
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
1725
"github.com/elastic/elastic-agent/pkg/component"
18-
19-
"github.com/google/go-cmp/cmp"
20-
"github.com/google/go-cmp/cmp/cmpopts"
26+
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"
2127
)
2228

2329
type testAction struct {
@@ -215,3 +221,36 @@ func TestProxiedActionsNotifier(t *testing.T) {
215221
})
216222
}
217223
}
224+
225+
func TestSaveConfigToStore(t *testing.T) {
226+
// Create destination file
227+
tmpDir := t.TempDir()
228+
dest := filepath.Join(tmpDir, "dest.txt")
229+
err := os.WriteFile(dest, []byte("existing content"), 0644)
230+
require.NoError(t, err)
231+
232+
// Open handle on destination file and keep it open for a random duration
233+
// between [200ms, 1800ms).
234+
openDuration := time.Duration(200+rand.Int64N(1800-200)) * time.Millisecond
235+
destFile, err := os.Open(dest)
236+
require.NoError(t, err)
237+
time.AfterFunc(openDuration, func() {
238+
destFile.Close()
239+
})
240+
defer destFile.Close()
241+
242+
// Create disk store
243+
store, err := storage.NewDiskStore(dest)
244+
require.NoError(t, err)
245+
246+
// Try to save content to store
247+
reader := strings.NewReader("new content")
248+
log, _ := loggertest.New("test")
249+
err = saveConfigToStore(store, reader, log)
250+
require.NoError(t, err)
251+
252+
// Check that dest file has been replaced with new file
253+
data, err := os.ReadFile(dest)
254+
require.NoError(t, err)
255+
require.Equal(t, "new content", string(data))
256+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License 2.0;
3+
// you may not use this file except in compliance with the Elastic License 2.0.
4+
5+
//go:build windows
6+
7+
package handlers
8+
9+
import (
10+
"context"
11+
"errors"
12+
"io"
13+
"syscall"
14+
"time"
15+
16+
"github.com/cenkalti/backoff/v4"
17+
18+
"github.com/elastic/elastic-agent/internal/pkg/agent/storage"
19+
"github.com/elastic/elastic-agent/pkg/core/logger"
20+
)
21+
22+
const saveRetryInterval = 50 * time.Millisecond
23+
const saveRetryDuration = 2 * time.Second
24+
25+
// saveConfigToStore saves the given configuration (reader) to the given store.
26+
// On Windows platforms, the save operation is retried if the error is an
27+
// ACCESS_DENIED error, which can happen if the file is locked by another process.
28+
func saveConfigToStore(store storage.Store, reader io.ReadSeeker, log *logger.Logger) error {
29+
ctx, cancel := context.WithTimeout(context.Background(), saveRetryDuration)
30+
defer cancel()
31+
32+
bo := backoff.WithContext(backoff.NewConstantBackOff(saveRetryInterval), ctx)
33+
34+
return backoff.Retry(func() error {
35+
err := store.Save(reader)
36+
if err == nil {
37+
// Save succeeded
38+
return nil
39+
}
40+
41+
if !errors.Is(err, syscall.ERROR_ACCESS_DENIED) {
42+
// Save failed due to an error that is not ACCESS_DENIED. Immediately
43+
// indicate failure without retrying further.
44+
log.Debugf("Saving configuration to store failed: %v. Not retrying.", err)
45+
return backoff.Permanent(err)
46+
}
47+
48+
if _, seekErr := reader.Seek(0, io.SeekStart); seekErr != nil {
49+
log.Debugf("Saving configuration to store failed: %v. Failed to reset reader: %v. Not retrying.", err, seekErr)
50+
return backoff.Permanent(errors.Join(err, seekErr))
51+
}
52+
53+
log.Debugf("Saving configuration to store failed: %v. Retrying...", err)
54+
return err
55+
}, bo)
56+
}

0 commit comments

Comments
 (0)