Skip to content

Commit e503a25

Browse files
committed
Fix for PGO-2380:
Only add logrotate volume mounts to instance pod when backups are enabled. Add kuttl tests to ensure that collector will run on postgres instance when backups are disabled.
1 parent d7d2a1d commit e503a25

15 files changed

+283
-3
lines changed

internal/controller/postgrescluster/instance.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,11 +1218,12 @@ func (r *Reconciler) reconcileInstance(
12181218
}
12191219
}
12201220

1221-
// For now, we are not using logrotate to rotate postgres or patroni logs
1222-
// but we are using it for pgbackrest logs in the postgres pod
1221+
// For now, we are not using logrotate to rotate postgres or patroni logs,
1222+
// but we are using it for pgbackrest logs in the postgres pod, so we will
1223+
// set includeLogrotate to true, but only if backups are enabled.
12231224
collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template,
12241225
[]corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword,
1225-
[]string{naming.PGBackRestPGDataLogPath}, true, true)
1226+
[]string{naming.PGBackRestPGDataLogPath}, backupsSpecFound, true)
12261227
}
12271228

12281229
// Add postgres-exporter to the instance Pod spec
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/12--create-cluster.yaml
5+
assert:
6+
- files/12-cluster-created.yaml
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestAssert
3+
commands:
4+
# First, check that all containers in the instance pod are ready.
5+
# Then, grab the collector metrics output and check that a postgres
6+
# metric is present, as well as a patroni metric.
7+
# Then, check the collector logs for patroni, and postgres logs.
8+
# Finally, ensure the monitoring user exists and is configured.
9+
- script: |
10+
retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; }
11+
check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; }
12+
contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; }
13+
14+
pod=$(kubectl get pods -o name -n "${NAMESPACE}" \
15+
-l postgres-operator.crunchydata.com/cluster=otel-cluster-no-backups,postgres-operator.crunchydata.com/data=postgres)
16+
[ "$pod" = "" ] && retry "Pod not found" && exit 1
17+
18+
condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}")
19+
[ "$condition_json" = "" ] && retry "conditions not found" && exit 1
20+
{ check_containers_ready "$condition_json"; } || {
21+
retry "containers not ready"
22+
exit 1
23+
}
24+
25+
scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \
26+
curl --insecure --silent http://localhost:9187/metrics)
27+
{ contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || {
28+
retry "5 second metric not found"
29+
exit 1
30+
}
31+
{ contains "${scrape_metrics}" 'patroni_postgres_running'; } || {
32+
retry "patroni metric not found"
33+
exit 1
34+
}
35+
36+
logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope)
37+
{ contains "${logs}" 'InstrumentationScope patroni'; } || {
38+
retry "patroni logs not found"
39+
exit 1
40+
}
41+
{ contains "${logs}" 'InstrumentationScope postgres'; } || {
42+
retry "postgres logs not found"
43+
exit 1
44+
}
45+
46+
kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \
47+
-- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL'
48+
DO $$
49+
DECLARE
50+
result record;
51+
BEGIN
52+
SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring';
53+
ASSERT FOUND, 'user not found';
54+
END $$
55+
SQL
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
apply:
4+
- files/14--add-backups.yaml
5+
assert:
6+
- files/14-backups-added.yaml
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
commands:
4+
- command: |-
5+
kubectl patch postgrescluster otel-cluster-no-backups --type 'merge' -p '{"spec":{"backups": null}}'
6+
namespaced: true
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: kuttl.dev/v1beta1
2+
kind: TestStep
3+
commands:
4+
- command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true"
5+
namespaced: true
6+
assert:
7+
- files/16-backups-removed.yaml

testing/kuttl/e2e/otel-logging-and-metrics/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetry
2323
4. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors.
2424
1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready.
2525
2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn).
26+
5. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups.
27+
1. Ensure that the cluster comes up and the database and collector containers are running and ready.
28+
2. Add a backups spec to the new cluster and ensure that pgbackrest is added to the instance pod, a repo-host pod is created, and the collector runs on both pods.
29+
3. Remove the backups spec from the new cluster.
30+
4. Annotate the cluster to allow backups to be removed.
31+
5. Ensure that the repo-host pod is destroyed, pgbackrest is removed from the instance pod, and the collector continues to run on the instance pod.
2632

2733
### NOTES
2834

testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ metadata:
4646
labels:
4747
postgres-operator.crunchydata.com/data: pgbackrest
4848
postgres-operator.crunchydata.com/cluster: otel-cluster
49+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
4950
status:
5051
containerStatuses:
5152
- name: collector

testing/kuttl/e2e/otel-logging-and-metrics/files/08-custom-queries-added.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ metadata:
4646
labels:
4747
postgres-operator.crunchydata.com/data: pgbackrest
4848
postgres-operator.crunchydata.com/cluster: otel-cluster
49+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
4950
status:
5051
containerStatuses:
5152
- name: collector

testing/kuttl/e2e/otel-logging-and-metrics/files/10-logs-exporter-added.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ metadata:
4646
labels:
4747
postgres-operator.crunchydata.com/data: pgbackrest
4848
postgres-operator.crunchydata.com/cluster: otel-cluster
49+
postgres-operator.crunchydata.com/crunchy-otel-collector: "true"
4950
status:
5051
containerStatuses:
5152
- name: collector

0 commit comments

Comments
 (0)