Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 93 additions & 72 deletions internal/controller/pgupgrade/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ import (
"github.com/crunchydata/postgres-operator/internal/feature"
"github.com/crunchydata/postgres-operator/internal/initialize"
"github.com/crunchydata/postgres-operator/internal/naming"
"github.com/crunchydata/postgres-operator/internal/postgres"
"github.com/crunchydata/postgres-operator/internal/shell"
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
)

// Upgrade job

// pgUpgradeJob returns the ObjectMeta for the pg_upgrade Job utilized to
// upgrade from one major PostgreSQL version to another
func pgUpgradeJob(upgrade *v1beta1.PGUpgrade) metav1.ObjectMeta {
Expand All @@ -48,20 +48,24 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
oldVersion := spec.FromPostgresVersion
newVersion := spec.ToPostgresVersion

// if the fetch key command is set for TDE, provide the value during initialization
initdb := `/usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}"`
var argEncryptionKeyCommand string
if fetchKeyCommand != "" {
initdb += ` --encryption-key-command "` + fetchKeyCommand + `"`
argEncryptionKeyCommand = ` --encryption-key-command=` + shell.QuoteWord(fetchKeyCommand)
}

args := []string{fmt.Sprint(oldVersion), fmt.Sprint(newVersion)}
script := strings.Join([]string{
// Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
`shopt -so errexit nounset`,

`declare -r data_volume='/pgdata' old_version="$1" new_version="$2"`,
`printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"`,
`printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"`,
`section() { printf '\n\n%s\n' "$@"; }`,

// Note: Rather than import the nss_wrapper init container, as we do in
// the main postgres-operator, this job does the required nss_wrapper
// NOTE: Rather than import the nss_wrapper init container, as we do in
// the PostgresCluster controller, this job does the required nss_wrapper
// settings here.
`section 'Step 1 of 7: Ensuring username is postgres...'`,

// Create a copy of the system group definitions, but remove the "postgres"
// group or any group with the current GID. Replace them with our own that
Expand All @@ -80,53 +84,71 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
// Enable nss_wrapper so the current UID and GID resolve to "postgres".
// - https://cwrap.org/nss_wrapper.html
`export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD`,
`id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]`,

`section 'Step 2 of 7: Finding data and tools...'`,
`old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]`,
`new_data="${data_volume}/pg${new_version}"`,

// Search for Postgres executables matching the old and new versions.
// Use `command -v` to look through all of PATH, then trim the executable name from the absolute path.
`old_bin=$(` + postgres.ShellPath(oldVersion) + ` && command -v postgres)`,
`old_bin="${old_bin%/postgres}"`,
`new_bin=$(` + postgres.ShellPath(newVersion) + ` && command -v pg_upgrade)`,
`new_bin="${new_bin%/pg_upgrade}"`,

// The executables found might not be the versions we need, so do a cursory check before writing to disk.
// pg_upgrade checks every executable thoroughly since PostgreSQL v14.
//
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_upgrade/exec.c#l355
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_14_0;f=src/bin/pg_upgrade/exec.c#l358
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_18_0;f=src/bin/pg_upgrade/exec.c#l370
`(set -x && [[ "$("${old_bin}/postgres" --version)" =~ ") ${old_version}"($|[^0-9]) ]])`,
`(set -x && [[ "$("${new_bin}/initdb" --version)" =~ ") ${new_version}"($|[^0-9]) ]])`,

// pg_upgrade writes its files in "${new_data}/pg_upgrade_output.d" since PostgreSQL v15.
// Change to a writable working directory to be compatible with PostgreSQL v14 and earlier.
//
// https://www.postgresql.org/docs/release/15#id-1.11.6.20.5.11.3
`cd "${data_volume}"`,

// Below is the pg_upgrade script used to upgrade a PostgresCluster from
// one major version to another. Additional information concerning the
// steps used and command flag specifics can be found in the documentation:
// - https://www.postgresql.org/docs/current/pgupgrade.html

// To begin, we first move to the mounted /pgdata directory and create a
// new version directory which is then initialized with the initdb command.
`cd /pgdata || exit`,
`echo -e "Step 1: Making new pgdata directory...\n"`,
`mkdir /pgdata/pg"${new_version}"`,
`echo -e "Step 2: Initializing new pgdata directory...\n"`,
initdb,

// Before running the upgrade check, which ensures the clusters are compatible,
// proper permissions have to be set on the old pgdata directory and the
// preload library settings must be copied over.
`echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"`,
`chmod 750 /pgdata/pg"${old_version}"`,
`echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"`,
`echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \`,
`/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf`,

// Before the actual upgrade is run, we will run the upgrade --check to
// verify everything before actually changing any data.
`echo -e "Step 5: Running pg_upgrade check...\n"`,
`time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`,
`--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\`,
` --new-datadir /pgdata/pg"${new_version}" --check` + argMethod + argJobs,

// Assuming the check completes successfully, the pg_upgrade command will
// be run that actually prepares the upgraded pgdata directory.
`echo -e "\nStep 6: Running pg_upgrade...\n"`,
`time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`,
`--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \`,
`--new-datadir /pgdata/pg"${new_version}"` + argMethod + argJobs,

// Since we have cleared the Patroni cluster step by removing the EndPoints, we copy patroni.dynamic.json
// from the old data dir to help retain PostgreSQL parameters you had set before.
// - https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
`echo -e "\nStep 7: Copying patroni.dynamic.json...\n"`,
`cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"`,

`echo -e "\npg_upgrade Job Complete!"`,
`section 'Step 3 of 7: Initializing new data directory...'`,
`PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums` + argEncryptionKeyCommand,

// Read the configured value then quote it; every single-quote U+0027 is replaced by two.
//
// https://www.postgresql.org/docs/current/config-setting.html
// https://www.gnu.org/software/bash/manual/bash.html#ANSI_002dC-Quoting
`section 'Step 4 of 7: Copying shared_preload_libraries parameter...'`,
`value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)`,
`echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"`,

// NOTE: The default for --new-bindir is the directory of pg_upgrade since PostgreSQL v13.
//
// https://www.postgresql.org/docs/release/13#id-1.11.6.28.5.11
`section 'Step 5 of 7: Checking for potential issues...'`,
`"${new_bin}/pg_upgrade" --check` + argMethod + argJobs + ` \`,
`--old-bindir="${old_bin}" --old-datadir="${old_data}" \`,
`--new-bindir="${new_bin}" --new-datadir="${new_data}"`,

`section 'Step 6 of 7: Performing upgrade...'`,
`(set -x && time "${new_bin}/pg_upgrade"` + argMethod + argJobs + ` \`,
`--old-bindir="${old_bin}" --old-datadir="${old_data}" \`,
`--new-bindir="${new_bin}" --new-datadir="${new_data}")`,

// https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
`section 'Step 7 of 7: Copying Patroni settings...'`,
`(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")`,

`section 'Success!'`,
}, "\n")

return append([]string{"bash", "-ceu", "--", script, "upgrade"}, args...)
return append([]string{"bash", "-c", "--", script, "upgrade"}, args...)
}

// largestWholeCPU returns the maximum CPU request or limit as a non-negative
Expand Down Expand Up @@ -232,38 +254,37 @@ func (r *PGUpgradeReconciler) generateUpgradeJob(
// We currently target the `pgdata/pg{old_version}` and `pgdata/pg{old_version}_wal`
// directories for removal.
func removeDataCommand(upgrade *v1beta1.PGUpgrade) []string {
oldVersion := fmt.Sprint(upgrade.Spec.FromPostgresVersion)
oldVersion := upgrade.Spec.FromPostgresVersion

// Before removing the directories (both data and wal), we check that
// the directory is not in use by running `pg_controldata` and making sure
// the server state is "shut down in recovery"
// TODO(benjaminjb): pg_controldata seems pretty stable, but might want to
// experiment with a few more versions.
args := []string{oldVersion}
args := []string{fmt.Sprint(oldVersion)}
script := strings.Join([]string{
`declare -r old_version="$1"`,
`printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"`,
`echo -e "Checking the directory exists and isn't being used...\n"`,
`cd /pgdata || exit`,
// The string `shut down in recovery` is the dbstate that postgres sets from
// at least version 10 to 14 when a replica has been shut down.
// - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_controldata/pg_controldata.c;h=f911f98d946d83f1191abf35239d9b4455c5f52a;hb=HEAD#l59
// Note: `pg_controldata` is actually used by `pg_upgrade` before upgrading
// to make sure that the server in question is shut down as a primary;
// that aligns with our use here, where we're making sure that the server in question
// was shut down as a replica.
// - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_upgrade/controldata.c;h=41b8f69b8cbe4f40e6098ad84c2e8e987e24edaf;hb=HEAD#l122
`if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi`,
`echo -e "Removing old pgdata directory...\n"`,
// When deleting the wal directory, use `realpath` to resolve the symlink from
// the pgdata directory. This is necessary because the wal directory can be
// mounted at different places depending on if an external wal PVC is used,
// i.e. `/pgdata/pg14_wal` vs `/pgwal/pg14_wal`
`rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"`,
`echo -e "Remove Data Job Complete!"`,
// Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
`shopt -so errexit nounset`,

`declare -r data_volume='/pgdata' old_version="$1"`,
`printf 'Removing PostgreSQL %s data...\n\n' "$@"`,
`delete() (set -x && rm -rf -- "$@")`,

`old_data="${data_volume}/pg${old_version}"`,
`control=$(` + postgres.ShellPath(oldVersion) + ` && LC_ALL=C pg_controldata "${old_data}")`,
`read -r state <<< "${control##*cluster state:}"`,

// We expect exactly one state for a replica that has been stopped.
//
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_controldata/pg_controldata.c#l55
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/bin/pg_controldata/pg_controldata.c#l58
`[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }`,

// "rm" does not follow symbolic links.
// Delete the old data directory after subdirectories that contain versioned data.
`delete "${old_data}/pg_wal/"`,
`delete "${old_data}" && echo 'Success!'`,
}, "\n")

return append([]string{"bash", "-ceu", "--", script, "remove"}, args...)
return append([]string{"bash", "-c", "--", script, "remove"}, args...)
}

// generateRemoveDataJob returns a Job that can remove the data
Expand Down
81 changes: 46 additions & 35 deletions internal/controller/pgupgrade/jobs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func TestUpgradeCommand(t *testing.T) {
spec := &v1beta1.PGUpgradeSettings{Jobs: tt.Spec}
command := upgradeCommand(spec, "")
assert.Assert(t, len(command) > 3)
assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3])
assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3])

script := command[3]
assert.Assert(t, cmp.Contains(script, tt.Args))
Expand All @@ -111,7 +111,7 @@ func TestUpgradeCommand(t *testing.T) {
spec := &v1beta1.PGUpgradeSettings{TransferMethod: tt.Spec}
command := upgradeCommand(spec, "")
assert.Assert(t, len(command) > 3)
assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3])
assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3])

script := command[3]
assert.Assert(t, cmp.Contains(script, tt.Args))
Expand Down Expand Up @@ -196,39 +196,48 @@ spec:
containers:
- command:
- bash
- -ceu
- -c
- --
- |-
shopt -so errexit nounset
declare -r data_volume='/pgdata' old_version="$1" new_version="$2"
printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"
printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"
section() { printf '\n\n%s\n' "$@"; }
section 'Step 1 of 7: Ensuring username is postgres...'
gid=$(id -G); NSS_WRAPPER_GROUP=$(mktemp)
(sed "/^postgres:x:/ d; /^[^:]*:x:${gid%% *}:/ d" /etc/group
echo "postgres:x:${gid%% *}:") > "${NSS_WRAPPER_GROUP}"
uid=$(id -u); NSS_WRAPPER_PASSWD=$(mktemp)
(sed "/^postgres:x:/ d; /^[^:]*:x:${uid}:/ d" /etc/passwd
echo "postgres:x:${uid}:${gid%% *}::${data_volume}:") > "${NSS_WRAPPER_PASSWD}"
export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD
cd /pgdata || exit
echo -e "Step 1: Making new pgdata directory...\n"
mkdir /pgdata/pg"${new_version}"
echo -e "Step 2: Initializing new pgdata directory...\n"
/usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}"
echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"
chmod 750 /pgdata/pg"${old_version}"
echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"
echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \
/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf
echo -e "Step 5: Running pg_upgrade check...\n"
time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \
--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\
--new-datadir /pgdata/pg"${new_version}" --check --link --jobs=1
echo -e "\nStep 6: Running pg_upgrade...\n"
time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \
--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \
--new-datadir /pgdata/pg"${new_version}" --link --jobs=1
echo -e "\nStep 7: Copying patroni.dynamic.json...\n"
cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"
echo -e "\npg_upgrade Job Complete!"
id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]
section 'Step 2 of 7: Finding data and tools...'
old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]
new_data="${data_volume}/pg${new_version}"
old_bin=$(PATH="/usr/lib/postgresql/19/bin:/usr/libexec/postgresql19:/usr/pgsql-19/bin${PATH+:${PATH}}" && command -v postgres)
old_bin="${old_bin%/postgres}"
new_bin=$(PATH="/usr/lib/postgresql/25/bin:/usr/libexec/postgresql25:/usr/pgsql-25/bin${PATH+:${PATH}}" && command -v pg_upgrade)
new_bin="${new_bin%/pg_upgrade}"
(set -x && [[ "$("${old_bin}/postgres" --version)" =~ ") ${old_version}"($|[^0-9]) ]])
(set -x && [[ "$("${new_bin}/initdb" --version)" =~ ") ${new_version}"($|[^0-9]) ]])
cd "${data_volume}"
section 'Step 3 of 7: Initializing new data directory...'
PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums
section 'Step 4 of 7: Copying shared_preload_libraries parameter...'
value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)
echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"
section 'Step 5 of 7: Checking for potential issues...'
"${new_bin}/pg_upgrade" --check --link --jobs=1 \
--old-bindir="${old_bin}" --old-datadir="${old_data}" \
--new-bindir="${new_bin}" --new-datadir="${new_data}"
section 'Step 6 of 7: Performing upgrade...'
(set -x && time "${new_bin}/pg_upgrade" --link --jobs=1 \
--old-bindir="${old_bin}" --old-datadir="${old_data}" \
--new-bindir="${new_bin}" --new-datadir="${new_data}")
section 'Step 7 of 7: Copying Patroni settings...'
(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")
section 'Success!'
- upgrade
- "19"
- "25"
Expand Down Expand Up @@ -263,7 +272,7 @@ status: {}

tdeJob := reconciler.generateUpgradeJob(ctx, upgrade, startup, "echo testKey")
assert.Assert(t, cmp.MarshalContains(tdeJob,
`/usr/pgsql-"${new_version}"/bin/initdb -k -D /pgdata/pg"${new_version}" --encryption-key-command "echo testKey"`))
`PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums --encryption-key-command='echo testKey'`))
}

func TestGenerateRemoveDataJob(t *testing.T) {
Expand Down Expand Up @@ -339,17 +348,19 @@ spec:
containers:
- command:
- bash
- -ceu
- -c
- --
- |-
declare -r old_version="$1"
printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"
echo -e "Checking the directory exists and isn't being used...\n"
cd /pgdata || exit
if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi
echo -e "Removing old pgdata directory...\n"
rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"
echo -e "Remove Data Job Complete!"
shopt -so errexit nounset
declare -r data_volume='/pgdata' old_version="$1"
printf 'Removing PostgreSQL %s data...\n\n' "$@"
delete() (set -x && rm -rf -- "$@")
old_data="${data_volume}/pg${old_version}"
control=$(PATH="/usr/lib/postgresql/19/bin:/usr/libexec/postgresql19:/usr/pgsql-19/bin${PATH+:${PATH}}" && LC_ALL=C pg_controldata "${old_data}")
read -r state <<< "${control##*cluster state:}"
[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }
delete "${old_data}/pg_wal/"
delete "${old_data}" && echo 'Success!'
- remove
- "19"
image: img4
Expand Down
Loading
Loading