diff options
| -rw-r--r-- | tests/core/snapd-refresh-vs-services-reboots/task.yaml | 136 | ||||
| -rwxr-xr-x | tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh | 14 | ||||
| -rw-r--r-- | tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml | 9 | ||||
| -rw-r--r-- | tests/core/snapd-refresh-vs-services/task.yaml | 212 | ||||
| -rwxr-xr-x | tests/lib/mock-shutdown | 7 | ||||
| -rwxr-xr-x | tests/lib/snaps/test-snapd-simple-service/bin/service (renamed from tests/regression/lp-1813963/test-snapd-simple-service/bin/service) | 0 | ||||
| -rw-r--r-- | tests/lib/snaps/test-snapd-simple-service/meta/snap.yaml (renamed from tests/regression/lp-1813963/test-snapd-simple-service/meta/snap.yaml) | 0 | ||||
| -rw-r--r-- | tests/lib/uc20-recovery.sh | 12 |
8 files changed, 386 insertions, 4 deletions
diff --git a/tests/core/snapd-refresh-vs-services-reboots/task.yaml b/tests/core/snapd-refresh-vs-services-reboots/task.yaml new file mode 100644 index 0000000000..55b3f6c42e --- /dev/null +++ b/tests/core/snapd-refresh-vs-services-reboots/task.yaml @@ -0,0 +1,136 @@ +summary: Check that refreshing snapd in the worst case reboots if things go sideways + +# TODO: move this test to tests/regression/lp-1924805 ? + +systems: [ubuntu-core-18-*, ubuntu-core-20-*] + +environment: + # the test needs to start from 2.49.2 to reproduce the bug and demonstrate the + # worst case behavior still works + SNAPD_2_49_2_X86: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11588.snap + SNAPD_2_49_2_ARM64: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11584.snap + SNAPD_2_49_2_ARMHF: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11586.snap + +prepare: | + snap install http --devmode # devmode so it can save to any dir + + # save the version of snapd from the PR to refresh to later + INITIAL_REV=$(snap list snapd | tail -n +2 | awk '{print $3}') + cp "/var/lib/snapd/snaps/snapd_$INITIAL_REV.snap" snapd-pr.snap + + # download and install snapd 2.49.2 + if os.query is-pc-amd64; then + http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_X86" + elif os.query is-arm64; then + http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARM64" + elif os.query is-armhf; then + http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARMHF" + else + echo "architecture not supported for this variant" + exit 0 + fi + snap install --dangerous snapd_2.49.2.snap + tests.cleanup defer snap revert snapd --revision="$INITIAL_REV" + snap version | MATCH 2.49.2 + +restore: | + # We need special restore code here for the snapd snap because of the multiple + # variants of this test and existing subtle bugs in our restore code. The issue + # is that when we install a new revision of the snapd snap dangerously without + # store assertions, at the + # end of the test to ensure that other tests use the snapd version that we + # started with, we revert the snapd snap to the original revision without + # triggering a garbage collection of the revisions we reverted away from (i.e. + # the new revisions we installed as part of this test execution). This is + # problematic because it leaves those old revisions mounted at /snap/snapd/x2 + # for example and now at the end of the test the active revision of snapd is + # x1. Then during the next test execution that tries to install a dangerous + # local revision of snapd, it will have lost any reference to the previous x2 + # revision (since we clear state.json at the end of the test execution in + # reset.sh), and now snapd is trying to install and make active x2 again, and + # it will copy all the right files, but when it comes time to mount the new x2 + # revision of snapd, there will already be an existing active mount unit for + # the previous revision at /snap/snapd/x2, and so the bits of code that try + # to mount the .snap file there will effectively just silently fail since the + # is already a snapd snap mounted there, it is just the wrong one mounted + # there. + + # We remedy this here, temporarily, by first manually executing all cleanups + # that were deferred, since one of those deferred cleanups is likely the + # revert to the previous revision of the snapd snap, and then we manually + # remove all disabled revisions of the snapd snap - this manual removal will + # in fact unmount the mount units for /snap/snapd/x2 for example, avoiding the + # bug. + + tests.cleanup restore + for rev in $(snap list snapd --all | grep disabled | awk '{print $3}'); do + snap remove snapd --revision="$rev" + done + +execute: | + if ! os.query is-pc-amd64 && ! os.query is-arm; then + echo "architecture not supported for this variant" + exit 0 + fi + + if [ "$SPREAD_REBOOT" = "0" ]; then + # install the special service snap which will fail to startup every other time + # it is started + snap pack test-snapd-svc-flip-flop --filename=app.snap + + # install the snap so it is running + snap install --dangerous app.snap + + # ensure that the service is active and has a running PID + snap services | MATCH 'test-snapd-svc-flip-flop\.svc1\s+enabled\s+active.*' + + # ensure that the service was installed with Requires= which triggers the bug + MATCH Requires=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-svc-flip-flop.svc1.service + + # mock the reboot command so that when we try to refresh snapd and it fails, + # we can observe this and reboot with spread instead + + # TODO: make this a common helper for mocking expected snapd reboots, this + # code is shared with uc20-recovery.sh right now + + # save the original systemctl command since we essentially need to mock it + cp /bin/systemctl /tmp/orig-systemctl + + mount -o bind "$TESTSLIB/mock-shutdown" /bin/systemctl + tests.cleanup defer umount /bin/systemctl + + # now refresh to the new version of snapd - this will trigger a reboot of + # the system + snap install --dangerous snapd-pr.snap 2>&1 | MATCH "snapd is about to reboot the system" + + # snapd schedules a slow timeout and an immediate one, however it is + # scheduled asynchronously, try to keep the check simple + # shellcheck disable=SC2016 + retry -n 30 --wait 1 sh -c 'test "$(wc -l < /tmp/mock-shutdown.calls)" = "2"' + # a reboot in 10 minutes should have been scheduled + MATCH -- '-r \+10' < /tmp/mock-shutdown.calls + # and an immediate reboot should have been scheduled + MATCH -- '-r \+0' < /tmp/mock-shutdown.calls + + # check that before shutting down, snapd rewrote the unit service file to + # contain Wants= instead of Requires= + MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-svc-flip-flop.svc1.service + + # and the service should not be running anymore + not systemctl is-active snap.test-snapd-svc-flip-flop.svc.service + + # restore shutdown so that spread can reboot the host + tests.cleanup pop + + REBOOT + fi + + # we rebooted after installing the new snapd as expected, make sure that the + # service is active and that the snap change finishes successfully + + snap watch --last=install + snap changes snapd | NOMATCH Error + snap changes snapd | NOMATCH Undone + snap changes snapd | NOMATCH Hold + + snap services | MATCH 'test-snapd-svc-flip-flop\.svc1\s+enabled\s+active.*' diff --git a/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh new file mode 100755 index 0000000000..ebc18ac081 --- /dev/null +++ b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh @@ -0,0 +1,14 @@ +#!/bin/bash -ex + + +# if the file exists, then we don't fork, we just sleep forever +if [ -f "$SNAP_DATA/prevent-start" ]; then + rm -rf "$SNAP_DATA/prevent-start" + sleep infinity +fi + +# otherwise create the file and fork a process and then exit +sleep infinity & +touch "$SNAP_DATA/prevent-start" + +exit 0 diff --git a/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml new file mode 100644 index 0000000000..c476bba4ec --- /dev/null +++ b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml @@ -0,0 +1,9 @@ +name: test-snapd-svc-flip-flop +version: "0.1" + +apps: + svc1: + command: bin/svc.sh + daemon: forking + # short start-timeout so the test finishes quickly + start-timeout: 5s diff --git a/tests/core/snapd-refresh-vs-services/task.yaml b/tests/core/snapd-refresh-vs-services/task.yaml new file mode 100644 index 0000000000..05d5af6585 --- /dev/null +++ b/tests/core/snapd-refresh-vs-services/task.yaml @@ -0,0 +1,212 @@ +summary: Check that refreshing snapd does not interfere with snap services + +# TODO: we should also run it on classic later + +systems: [ubuntu-core-18-*, ubuntu-core-20-*] + +environment: + SNAPD_VERSION_UNDER_TEST/start_w_pr: pr + SNAPD_VERSION_UNDER_TEST/start_w_stable: stable + SNAPD_VERSION_UNDER_TEST/start_w_2_49_2: "2.49.2" + + # links to specific snapd versions + + SNAPD_2_49_1_X86: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.1_11402.snap + SNAPD_2_49_1_ARM64: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.1_11408.snap + SNAPD_2_49_1_ARMHF: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.1_11410.snap + + SNAPD_2_49_2_X86: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11588.snap + SNAPD_2_49_2_ARM64: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11584.snap + SNAPD_2_49_2_ARMHF: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11586.snap + +prepare: | + # install http snap to download files, jq + remarshal to simplify the check if + # stable == 2.49.2 so we can skip that case automatically until a new version + # is released to stable + snap install http --devmode # devmode so it can save to any dir + snap install jq remarshal + # save the current version of snapd for later + INITIAL_REV=$(snap list snapd | tail -n +2 | awk '{print $3}') + cp "/var/lib/snapd/snaps/snapd_$INITIAL_REV.snap" snapd-pr.snap + + snap set system experimental.parallel-instances=true + tests.cleanup defer snap unset system experimental.parallel-instances + + # keep around all the snapd snap revisions we will use in the test so that we + # can always easily revert back to the one at the end of the test + snap set system refresh.retain=5 + tests.cleanup defer snap unset system refresh.retain + +restore: | + # We need special restore code here for the snapd snap because of the multiple + # variants of this test and existing subtle bugs in our restore code. The issue + # is that when we install a new revision of the snapd snap dangerously without + # store assertions, at the + # end of the test to ensure that other tests use the snapd version that we + # started with, we revert the snapd snap to the original revision without + # triggering a garbage collection of the revisions we reverted away from (i.e. + # the new revisions we installed as part of this test execution). This is + # problematic because it leaves those old revisions mounted at /snap/snapd/x2 + # for example and now at the end of the test the active revision of snapd is + # x1. Then during the next test execution that tries to install a dangerous + # local revision of snapd, it will have lost any reference to the previous x2 + # revision (since we clear state.json at the end of the test execution in + # reset.sh), and now snapd is trying to install and make active x2 again, and + # it will copy all the right files, but when it comes time to mount the new x2 + # revision of snapd, there will already be an existing active mount unit for + # the previous revision at /snap/snapd/x2, and so the bits of code that try + # to mount the .snap file there will effectively just silently fail since the + # is already a snapd snap mounted there, it is just the wrong one mounted + # there. + + # We remedy this here, temporarily, by first manually executing all cleanups + # that were deferred, since one of those deferred cleanups is likely the + # revert to the previous revision of the snapd snap, and then we manually + # remove all disabled revisions of the snapd snap - this manual removal will + # in fact unmount the mount units for /snap/snapd/x2 for example, avoiding the + # bug. + + tests.cleanup restore + for rev in $(snap list snapd --all | grep disabled | awk '{print $3}'); do + snap remove snapd --revision="$rev" + done + +execute: | + # check if snapd 2.49.2 is the current latest/stable release as it simplifies + # some of the logic below + if snap info snapd | yaml2json | jq -r '.channels."latest/stable"' | grep -q -Po '2.49.2\s+'; then + # skip the stable variant of the test + if [ "${SNAPD_VERSION_UNDER_TEST}" = "stable" ]; then + echo "Skipping duplicated test case" + exit 0 + fi + fi + + if ! os.query is-pc-amd64 && ! os.query is-arm; then + echo "architecture not supported for this variant" + exit 0 + fi + + echo "Ensure that the system is fully seeded" + snap changes | MATCH "Done.*Initialize system state" + + INITIAL_REV=$(snap list snapd | tail -n +2 | awk '{print $3}') + + # first thing is to install snapd 2.49.1 before the Requires= change was + # introduced so we can install a snap service that will not have Requires= in + # it + if os.query is-pc-amd64; then + http --quiet --download --output snapd_2.49.1.snap GET "$SNAPD_2_49_1_X86" + elif os.query is-arm64; then + http --quiet --download --output snapd_2.49.1.snap GET "$SNAPD_2_49_1_ARM64" + elif os.query is-armhf; then + http --quiet --download --output snapd_2.49.1.snap GET "$SNAPD_2_49_1_ARMHF" + fi + + snap install --dangerous snapd_2.49.1.snap + snap version | MATCH 2.49.1 + + # always go back to the original revision from the pr at the end of the test + tests.cleanup defer snap revert snapd --revision="$INITIAL_REV" + + echo "Install a service from snapd 2.49.1 to have one without Requires= in it" + "$TESTSTOOLS"/snaps-state install-local test-snapd-simple-service + + # check that it is initially active + snap services|MATCH ".*test-snapd-simple-service\s*enabled\s*active.*" + + # check that it doesn't have any dependencies on usr-lib-snapd.mount at the + # start + NOMATCH Requires=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service.test-snapd-simple-service.service + NOMATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service.test-snapd-simple-service.service + + # now refresh to the variant of the test + if [ "${SNAPD_VERSION_UNDER_TEST}" = "stable" ]; then + echo "Refreshing snapd to stable" + snap refresh --amend --channel=latest/stable snapd + elif [ "${SNAPD_VERSION_UNDER_TEST}" = "2.49.2" ]; then + # download and install snapd 2.49.2 + if os.query is-pc-amd64; then + http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_X86" + elif os.query is-arm64; then + http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARM64" + elif os.query is-armhf; then + http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARMHF" + fi + + echo "Refreshing snapd to 2.49.2" + snap install --dangerous snapd_2.49.2.snap + snap version | MATCH 2.49.2 + + elif [ "${SNAPD_VERSION_UNDER_TEST}" = "pr" ]; then + # refresh back to the version we originally had from before the test + # started + echo "Refreshing snapd to version from the pr" + snap install --dangerous snapd-pr.snap + fi + + # now install another service that will either get Requires= or Wants= for + # usr-lib-snapd.mount, depending on the variant of the test + "$TESTSTOOLS"/snaps-state install-local-as test-snapd-simple-service test-snapd-simple-service_alt + + # check that it is still initially active + snap services|MATCH ".*test-snapd-simple-service_alt.test-snapd-simple-service\s*enabled\s*active.*" + + # if we are not running with 2.49.2, then the second service should have been + # generated with Wants=, but if we are doing 2.49.2 then the second service + # should have Requires= in it + if [ "${SNAPD_VERSION_UNDER_TEST}" = "2.49.2" ];then + MATCH Requires=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service_alt.test-snapd-simple-service.service + else + MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service_alt.test-snapd-simple-service.service + fi + + # now repack current snapd to refresh to it again and observe the results + unsquashfs -d ./snapd-unpacked snapd-pr.snap + snap pack --filename=snapd_repacked.snap snapd-unpacked + + # save the PID's of the services + systemctl show -p MainPID snap.test-snapd-simple-service.test-snapd-simple-service > old-main.pid + systemctl show -p MainPID snap.test-snapd-simple-service_alt.test-snapd-simple-service > old-main_alt.pid + + echo "Refresh snapd" + snap install --dangerous snapd_repacked.snap + + # for all variants of the test, the main.pid should be the same since it did + # not ever contain the Requires= + systemctl show -p MainPID snap.test-snapd-simple-service.test-snapd-simple-service > new-main.pid + + if [ "$(cat new-main.pid)" != "$(cat old-main.pid)" ]; then + echo "The service without Requires= was restarted; test is broken" + exit 1 + fi + + # for the variants of the test that have the fix in them, that is stable and + # and pr, then we also should not have had the alt service pid change due to a + # restart + # for the other variant (2.49.2 only), we unfortunately ended up needing to + # restart the service so it should have a different PID, but it should be + # running again + + systemctl show -p MainPID snap.test-snapd-simple-service_alt.test-snapd-simple-service > new-main_alt.pid + if [ "${SNAPD_VERSION_UNDER_TEST}" = "2.49.2" ]; then + if [ "$(cat new-main_alt.pid)" = "$(cat old-main_alt.pid)" ]; then + echo "Somehow the service was not killed as expected ... test is probably broken" + exit 1 + fi + else + if [ "$(cat new-main.pid)" != "$(cat old-main.pid)" ]; then + echo "The service with Wants= was unexpectedly killed; test is broken" + exit 1 + fi + fi + + # in all cases both services should be active after the refresh + echo "Check services were kept active" + snap services|MATCH ".*test-snapd-simple-service\s*enabled\s*active.*" + snap services|MATCH ".*test-snapd-simple-service_alt.test-snapd-simple-service\s*enabled\s*active.*" + + # and both services should have Wants= now + echo "Check services were re-written to use Wants=usr-lib-snapd.mount now" + MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service_alt.test-snapd-simple-service.service + MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service.test-snapd-simple-service.service diff --git a/tests/lib/mock-shutdown b/tests/lib/mock-shutdown index 8681f07f81..2e9bad0590 100755 --- a/tests/lib/mock-shutdown +++ b/tests/lib/mock-shutdown @@ -5,6 +5,9 @@ # that would show up in the mock-shutdown.calls and confuse the test if [ "$1" = "-r" ]; then echo "$*" >> /tmp/mock-shutdown.calls + exit 0 +else + # not the shutdown command so we need to execute the original systemctl + # which needs to have been copied over to this location + exec /tmp/orig-systemctl "$@" fi - -exit 0 \ No newline at end of file diff --git a/tests/regression/lp-1813963/test-snapd-simple-service/bin/service b/tests/lib/snaps/test-snapd-simple-service/bin/service index 29754c4b97..29754c4b97 100755 --- a/tests/regression/lp-1813963/test-snapd-simple-service/bin/service +++ b/tests/lib/snaps/test-snapd-simple-service/bin/service diff --git a/tests/regression/lp-1813963/test-snapd-simple-service/meta/snap.yaml b/tests/lib/snaps/test-snapd-simple-service/meta/snap.yaml index f226fcbe1f..f226fcbe1f 100644 --- a/tests/regression/lp-1813963/test-snapd-simple-service/meta/snap.yaml +++ b/tests/lib/snaps/test-snapd-simple-service/meta/snap.yaml diff --git a/tests/lib/uc20-recovery.sh b/tests/lib/uc20-recovery.sh index cce8fe7a04..277d0e3ab6 100644 --- a/tests/lib/uc20-recovery.sh +++ b/tests/lib/uc20-recovery.sh @@ -10,9 +10,17 @@ transition_to_recover_mode(){ HAVE_LABEL=0 fi + # TODO: the following mocking of systemctl should be combined with the code + # in tests/core/snapd-refresh-vs-services-reboots into a generic shutdown + # helper to get better observability and less race conditions around + # snapd rebooting things from a live system under spread + + # save the original systemctl command since we essentially need to mock it + cp /bin/systemctl /tmp/orig-systemctl + # redirect shutdown command to our mock to observe calls and avoid racing # with spread - mount -o bind "$TESTSLIB/mock-shutdown" /usr/sbin/shutdown + mount -o bind "$TESTSLIB/mock-shutdown" /bin/systemctl # reboot to recovery mode echo "Request rebooting into recovery mode" @@ -32,7 +40,7 @@ transition_to_recover_mode(){ MATCH -- '-r \+0' < /tmp/mock-shutdown.calls # restore shutdown so that spread can reboot the host - umount /usr/sbin/shutdown + umount /bin/systemctl # with the external backend, we do not have the special snapd snap with # the first-boot run mode tweaks as created from $TESTLIB/prepare.sh's |
