8 files changed, 386 insertions, 4 deletions
diff --git a/tests/core/snapd-refresh-vs-services-reboots/task.yaml b/tests/core/snapd-refresh-vs-services-reboots/task.yaml
new file mode 100644
index 0000000000..55b3f6c42e
--- /dev/null
+++ b/tests/core/snapd-refresh-vs-services-reboots/task.yaml
@@ -0,0 +1,136 @@
+summary: Check that refreshing snapd in the worst case reboots if things go sideways
+
+# TODO: move this test to tests/regression/lp-1924805 ?
+
+systems: [ubuntu-core-18-*, ubuntu-core-20-*]
+
+environment:
+ # the test needs to start from 2.49.2 to reproduce the bug and demonstrate the
+ # worst case behavior still works
+ SNAPD_2_49_2_X86: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11588.snap
+ SNAPD_2_49_2_ARM64: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11584.snap
+ SNAPD_2_49_2_ARMHF: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11586.snap
+
+prepare: |
+ snap install http --devmode # devmode so it can save to any dir
+
+ # save the version of snapd from the PR to refresh to later
+ INITIAL_REV=$(snap list snapd | tail -n +2 | awk '{print $3}')
+ cp "/var/lib/snapd/snaps/snapd_$INITIAL_REV.snap" snapd-pr.snap
+
+ # download and install snapd 2.49.2
+ if os.query is-pc-amd64; then
+ http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_X86"
+ elif os.query is-arm64; then
+ http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARM64"
+ elif os.query is-armhf; then
+ http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARMHF"
+ else
+ echo "architecture not supported for this variant"
+ exit 0
+ fi
+ snap install --dangerous snapd_2.49.2.snap
+ tests.cleanup defer snap revert snapd --revision="$INITIAL_REV"
+ snap version | MATCH 2.49.2
+
+restore: |
+ # We need special restore code here for the snapd snap because of the multiple
+ # variants of this test and existing subtle bugs in our restore code. The issue
+ # is that when we install a new revision of the snapd snap dangerously without
+ # store assertions, at the
+ # end of the test to ensure that other tests use the snapd version that we
+ # started with, we revert the snapd snap to the original revision without 
+ # triggering a garbage collection of the revisions we reverted away from (i.e.
+ # the new revisions we installed as part of this test execution). This is 
+ # problematic because it leaves those old revisions mounted at /snap/snapd/x2
+ # for example and now at the end of the test the active revision of snapd is 
+ # x1. Then during the next test execution that tries to install a dangerous 
+ # local revision of snapd, it will have lost any reference to the previous x2
+ # revision (since we clear state.json at the end of the test execution in 
+ # reset.sh), and now snapd is trying to install and make active x2 again, and
+ # it will copy all the right files, but when it comes time to mount the new x2
+ # revision of snapd, there will already be an existing active mount unit for 
+ # the previous revision at /snap/snapd/x2, and so the bits of code that try
+ # to mount the .snap file there will effectively just silently fail since the
+ # is already a snapd snap mounted there, it is just the wrong one mounted 
+ # there. 
+
+ # We remedy this here, temporarily, by first manually executing all cleanups
+ # that were deferred, since one of those deferred cleanups is likely the 
+ # revert to the previous revision of the snapd snap, and then we manually 
+ # remove all disabled revisions of the snapd snap - this manual removal will
+ # in fact unmount the mount units for /snap/snapd/x2 for example, avoiding the
+ # bug.
+
+ tests.cleanup restore
+ for rev in $(snap list snapd --all | grep disabled | awk '{print $3}'); do
+ snap remove snapd --revision="$rev"
+ done
+
+execute: |
+ if ! os.query is-pc-amd64 && ! os.query is-arm; then
+ echo "architecture not supported for this variant"
+ exit 0
+ fi
+
+ if [ "$SPREAD_REBOOT" = "0" ]; then
+ # install the special service snap which will fail to startup every other time
+ # it is started
+ snap pack test-snapd-svc-flip-flop --filename=app.snap
+
+ # install the snap so it is running
+ snap install --dangerous app.snap
+
+ # ensure that the service is active and has a running PID
+ snap services | MATCH 'test-snapd-svc-flip-flop\.svc1\s+enabled\s+active.*'
+
+ # ensure that the service was installed with Requires= which triggers the bug
+ MATCH Requires=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-svc-flip-flop.svc1.service
+
+ # mock the reboot command so that when we try to refresh snapd and it fails,
+ # we can observe this and reboot with spread instead
+
+ # TODO: make this a common helper for mocking expected snapd reboots, this
+ # code is shared with uc20-recovery.sh right now
+
+ # save the original systemctl command since we essentially need to mock it
+ cp /bin/systemctl /tmp/orig-systemctl
+
+ mount -o bind "$TESTSLIB/mock-shutdown" /bin/systemctl
+ tests.cleanup defer umount /bin/systemctl
+
+ # now refresh to the new version of snapd - this will trigger a reboot of 
+ # the system
+ snap install --dangerous snapd-pr.snap 2>&1 | MATCH "snapd is about to reboot the system"
+
+ # snapd schedules a slow timeout and an immediate one, however it is
+ # scheduled asynchronously, try to keep the check simple
+ # shellcheck disable=SC2016
+ retry -n 30 --wait 1 sh -c 'test "$(wc -l < /tmp/mock-shutdown.calls)" = "2"'
+ # a reboot in 10 minutes should have been scheduled
+ MATCH -- '-r \+10' < /tmp/mock-shutdown.calls
+ # and an immediate reboot should have been scheduled
+ MATCH -- '-r \+0' < /tmp/mock-shutdown.calls
+
+ # check that before shutting down, snapd rewrote the unit service file to
+ # contain Wants= instead of Requires=
+ MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-svc-flip-flop.svc1.service
+
+ # and the service should not be running anymore
+ not systemctl is-active snap.test-snapd-svc-flip-flop.svc.service
+
+ # restore shutdown so that spread can reboot the host
+ tests.cleanup pop
+
+ REBOOT
+ fi
+
+ # we rebooted after installing the new snapd as expected, make sure that the
+ # service is active and that the snap change finishes successfully 
+
+ snap watch --last=install
+ snap changes snapd | NOMATCH Error 
+ snap changes snapd | NOMATCH Undone
+ snap changes snapd | NOMATCH Hold
+
+ snap services | MATCH 'test-snapd-svc-flip-flop\.svc1\s+enabled\s+active.*'
diff --git a/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh
new file mode 100755
index 0000000000..ebc18ac081
--- /dev/null
+++ b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/bin/svc.sh
@@ -0,0 +1,14 @@
+#!/bin/bash -ex
+
+
+# if the file exists, then we don't fork, we just sleep forever
+if [ -f "$SNAP_DATA/prevent-start" ]; then
+ rm -rf "$SNAP_DATA/prevent-start"
+ sleep infinity
+fi
+
+# otherwise create the file and fork a process and then exit
+sleep infinity &
+touch "$SNAP_DATA/prevent-start"
+
+exit 0
diff --git a/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml
new file mode 100644
index 0000000000..c476bba4ec
--- /dev/null
+++ b/tests/core/snapd-refresh-vs-services-reboots/test-snapd-svc-flip-flop/meta/snap.yaml
@@ -0,0 +1,9 @@
+name: test-snapd-svc-flip-flop
+version: "0.1"
+
+apps:
+ svc1:
+ command: bin/svc.sh
+ daemon: forking
+ # short start-timeout so the test finishes quickly
+ start-timeout: 5s
diff --git a/tests/core/snapd-refresh-vs-services/task.yaml b/tests/core/snapd-refresh-vs-services/task.yaml
new file mode 100644
index 0000000000..05d5af6585
--- /dev/null
+++ b/tests/core/snapd-refresh-vs-services/task.yaml
@@ -0,0 +1,212 @@
+summary: Check that refreshing snapd does not interfere with snap services
+
+# TODO: we should also run it on classic later
+
+systems: [ubuntu-core-18-*, ubuntu-core-20-*]
+
+environment:
+ SNAPD_VERSION_UNDER_TEST/start_w_pr: pr
+ SNAPD_VERSION_UNDER_TEST/start_w_stable: stable
+ SNAPD_VERSION_UNDER_TEST/start_w_2_49_2: "2.49.2"
+
+ # links to specific snapd versions
+
+ SNAPD_2_49_1_X86: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.1_11402.snap
+ SNAPD_2_49_1_ARM64: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.1_11408.snap
+ SNAPD_2_49_1_ARMHF: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.1_11410.snap
+
+ SNAPD_2_49_2_X86: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11588.snap
+ SNAPD_2_49_2_ARM64: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11584.snap
+ SNAPD_2_49_2_ARMHF: https://storage.googleapis.com/snapd-spread-tests/snaps/snapd_2.49.2_11586.snap
+
+prepare: |
+ # install http snap to download files, jq + remarshal to simplify the check if
+ # stable == 2.49.2 so we can skip that case automatically until a new version
+ # is released to stable
+ snap install http --devmode # devmode so it can save to any dir
+ snap install jq remarshal
+ # save the current version of snapd for later
+ INITIAL_REV=$(snap list snapd | tail -n +2 | awk '{print $3}')
+ cp "/var/lib/snapd/snaps/snapd_$INITIAL_REV.snap" snapd-pr.snap
+
+ snap set system experimental.parallel-instances=true
+ tests.cleanup defer snap unset system experimental.parallel-instances
+
+ # keep around all the snapd snap revisions we will use in the test so that we
+ # can always easily revert back to the one at the end of the test
+ snap set system refresh.retain=5
+ tests.cleanup defer snap unset system refresh.retain
+
+restore: |
+ # We need special restore code here for the snapd snap because of the multiple
+ # variants of this test and existing subtle bugs in our restore code. The issue
+ # is that when we install a new revision of the snapd snap dangerously without
+ # store assertions, at the
+ # end of the test to ensure that other tests use the snapd version that we
+ # started with, we revert the snapd snap to the original revision without 
+ # triggering a garbage collection of the revisions we reverted away from (i.e.
+ # the new revisions we installed as part of this test execution). This is 
+ # problematic because it leaves those old revisions mounted at /snap/snapd/x2
+ # for example and now at the end of the test the active revision of snapd is 
+ # x1. Then during the next test execution that tries to install a dangerous 
+ # local revision of snapd, it will have lost any reference to the previous x2
+ # revision (since we clear state.json at the end of the test execution in 
+ # reset.sh), and now snapd is trying to install and make active x2 again, and
+ # it will copy all the right files, but when it comes time to mount the new x2
+ # revision of snapd, there will already be an existing active mount unit for 
+ # the previous revision at /snap/snapd/x2, and so the bits of code that try
+ # to mount the .snap file there will effectively just silently fail since the
+ # is already a snapd snap mounted there, it is just the wrong one mounted 
+ # there. 
+
+ # We remedy this here, temporarily, by first manually executing all cleanups
+ # that were deferred, since one of those deferred cleanups is likely the 
+ # revert to the previous revision of the snapd snap, and then we manually 
+ # remove all disabled revisions of the snapd snap - this manual removal will
+ # in fact unmount the mount units for /snap/snapd/x2 for example, avoiding the
+ # bug.
+
+ tests.cleanup restore
+ for rev in $(snap list snapd --all | grep disabled | awk '{print $3}'); do
+ snap remove snapd --revision="$rev"
+ done
+
+execute: |
+ # check if snapd 2.49.2 is the current latest/stable release as it simplifies
+ # some of the logic below
+ if snap info snapd | yaml2json | jq -r '.channels."latest/stable"' | grep -q -Po '2.49.2\s+'; then
+ # skip the stable variant of the test
+ if [ "${SNAPD_VERSION_UNDER_TEST}" = "stable" ]; then
+ echo "Skipping duplicated test case"
+ exit 0
+ fi
+ fi
+
+ if ! os.query is-pc-amd64 && ! os.query is-arm; then
+ echo "architecture not supported for this variant"
+ exit 0
+ fi
+
+ echo "Ensure that the system is fully seeded"
+ snap changes | MATCH "Done.*Initialize system state"
+
+ INITIAL_REV=$(snap list snapd | tail -n +2 | awk '{print $3}')
+
+ # first thing is to install snapd 2.49.1 before the Requires= change was 
+ # introduced so we can install a snap service that will not have Requires= in
+ # it
+ if os.query is-pc-amd64; then
+ http --quiet --download --output snapd_2.49.1.snap GET "$SNAPD_2_49_1_X86"
+ elif os.query is-arm64; then
+ http --quiet --download --output snapd_2.49.1.snap GET "$SNAPD_2_49_1_ARM64"
+ elif os.query is-armhf; then
+ http --quiet --download --output snapd_2.49.1.snap GET "$SNAPD_2_49_1_ARMHF"
+ fi
+
+ snap install --dangerous snapd_2.49.1.snap
+ snap version | MATCH 2.49.1
+
+ # always go back to the original revision from the pr at the end of the test
+ tests.cleanup defer snap revert snapd --revision="$INITIAL_REV"
+
+ echo "Install a service from snapd 2.49.1 to have one without Requires= in it"
+ "$TESTSTOOLS"/snaps-state install-local test-snapd-simple-service
+
+ # check that it is initially active
+ snap services|MATCH ".*test-snapd-simple-service\s*enabled\s*active.*"
+
+ # check that it doesn't have any dependencies on usr-lib-snapd.mount at the 
+ # start
+ NOMATCH Requires=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service.test-snapd-simple-service.service
+ NOMATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service.test-snapd-simple-service.service
+
+ # now refresh to the variant of the test
+ if [ "${SNAPD_VERSION_UNDER_TEST}" = "stable" ]; then
+ echo "Refreshing snapd to stable"
+ snap refresh --amend --channel=latest/stable snapd
+ elif [ "${SNAPD_VERSION_UNDER_TEST}" = "2.49.2" ]; then
+ # download and install snapd 2.49.2
+ if os.query is-pc-amd64; then
+ http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_X86"
+ elif os.query is-arm64; then
+ http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARM64"
+ elif os.query is-armhf; then
+ http --quiet --download --output snapd_2.49.2.snap GET "$SNAPD_2_49_2_ARMHF"
+ fi
+
+ echo "Refreshing snapd to 2.49.2"
+ snap install --dangerous snapd_2.49.2.snap
+ snap version | MATCH 2.49.2
+
+ elif [ "${SNAPD_VERSION_UNDER_TEST}" = "pr" ]; then
+ # refresh back to the version we originally had from before the test 
+ # started
+ echo "Refreshing snapd to version from the pr"
+ snap install --dangerous snapd-pr.snap
+ fi
+
+ # now install another service that will either get Requires= or Wants= for
+ # usr-lib-snapd.mount, depending on the variant of the test
+ "$TESTSTOOLS"/snaps-state install-local-as test-snapd-simple-service test-snapd-simple-service_alt
+
+ # check that it is still initially active
+ snap services|MATCH ".*test-snapd-simple-service_alt.test-snapd-simple-service\s*enabled\s*active.*"
+
+ # if we are not running with 2.49.2, then the second service should have been
+ # generated with Wants=, but if we are doing 2.49.2 then the second service 
+ # should have Requires= in it
+ if [ "${SNAPD_VERSION_UNDER_TEST}" = "2.49.2" ];then 
+ MATCH Requires=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service_alt.test-snapd-simple-service.service
+ else
+ MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service_alt.test-snapd-simple-service.service
+ fi
+
+ # now repack current snapd to refresh to it again and observe the results
+ unsquashfs -d ./snapd-unpacked snapd-pr.snap
+ snap pack --filename=snapd_repacked.snap snapd-unpacked
+
+ # save the PID's of the services
+ systemctl show -p MainPID snap.test-snapd-simple-service.test-snapd-simple-service > old-main.pid
+ systemctl show -p MainPID snap.test-snapd-simple-service_alt.test-snapd-simple-service > old-main_alt.pid
+
+ echo "Refresh snapd"
+ snap install --dangerous snapd_repacked.snap
+
+ # for all variants of the test, the main.pid should be the same since it did
+ # not ever contain the Requires=
+ systemctl show -p MainPID snap.test-snapd-simple-service.test-snapd-simple-service > new-main.pid
+
+ if [ "$(cat new-main.pid)" != "$(cat old-main.pid)" ]; then
+ echo "The service without Requires= was restarted; test is broken"
+ exit 1
+ fi
+
+ # for the variants of the test that have the fix in them, that is stable and
+ # and pr, then we also should not have had the alt service pid change due to a
+ # restart
+ # for the other variant (2.49.2 only), we unfortunately ended up needing to
+ # restart the service so it should have a different PID, but it should be 
+ # running again
+
+ systemctl show -p MainPID snap.test-snapd-simple-service_alt.test-snapd-simple-service > new-main_alt.pid
+ if [ "${SNAPD_VERSION_UNDER_TEST}" = "2.49.2" ]; then
+ if [ "$(cat new-main_alt.pid)" = "$(cat old-main_alt.pid)" ]; then
+ echo "Somehow the service was not killed as expected ... test is probably broken"
+ exit 1
+ fi
+ else
+ if [ "$(cat new-main.pid)" != "$(cat old-main.pid)" ]; then
+ echo "The service with Wants= was unexpectedly killed; test is broken"
+ exit 1
+ fi
+ fi
+
+ # in all cases both services should be active after the refresh
+ echo "Check services were kept active"
+ snap services|MATCH ".*test-snapd-simple-service\s*enabled\s*active.*"
+ snap services|MATCH ".*test-snapd-simple-service_alt.test-snapd-simple-service\s*enabled\s*active.*"
+
+ # and both services should have Wants= now
+ echo "Check services were re-written to use Wants=usr-lib-snapd.mount now"
+ MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service_alt.test-snapd-simple-service.service
+ MATCH Wants=usr-lib-snapd.mount < /etc/systemd/system/snap.test-snapd-simple-service.test-snapd-simple-service.service
diff --git a/tests/lib/mock-shutdown b/tests/lib/mock-shutdown
index 8681f07f81..2e9bad0590 100755
--- a/tests/lib/mock-shutdown
+++ b/tests/lib/mock-shutdown
@@ -5,6 +5,9 @@
 # that would show up in the mock-shutdown.calls and confuse the test
 if [ "$1" = "-r" ]; then
 echo "$*" >> /tmp/mock-shutdown.calls
+ exit 0
+else
+ # not the shutdown command so we need to execute the original systemctl
+ # which needs to have been copied over to this location
+ exec /tmp/orig-systemctl "$@"
 fi
-
-exit 0 \ No newline at end of file
diff --git a/tests/regression/lp-1813963/test-snapd-simple-service/bin/service b/tests/lib/snaps/test-snapd-simple-service/bin/service
index 29754c4b97..29754c4b97 100755
--- a/tests/regression/lp-1813963/test-snapd-simple-service/bin/service
+++ b/tests/lib/snaps/test-snapd-simple-service/bin/service
diff --git a/tests/regression/lp-1813963/test-snapd-simple-service/meta/snap.yaml b/tests/lib/snaps/test-snapd-simple-service/meta/snap.yaml
index f226fcbe1f..f226fcbe1f 100644
--- a/tests/regression/lp-1813963/test-snapd-simple-service/meta/snap.yaml
+++ b/tests/lib/snaps/test-snapd-simple-service/meta/snap.yaml
diff --git a/tests/lib/uc20-recovery.sh b/tests/lib/uc20-recovery.sh
index cce8fe7a04..277d0e3ab6 100644
--- a/tests/lib/uc20-recovery.sh
+++ b/tests/lib/uc20-recovery.sh
@@ -10,9 +10,17 @@ transition_to_recover_mode(){
 HAVE_LABEL=0
 fi
 
+ # TODO: the following mocking of systemctl should be combined with the code
+ # in tests/core/snapd-refresh-vs-services-reboots into a generic shutdown
+ # helper to get better observability and less race conditions around 
+ # snapd rebooting things from a live system under spread
+
+ # save the original systemctl command since we essentially need to mock it
+ cp /bin/systemctl /tmp/orig-systemctl
+
 # redirect shutdown command to our mock to observe calls and avoid racing
 # with spread
- mount -o bind "$TESTSLIB/mock-shutdown" /usr/sbin/shutdown
+ mount -o bind "$TESTSLIB/mock-shutdown" /bin/systemctl
 
 # reboot to recovery mode
 echo "Request rebooting into recovery mode"
@@ -32,7 +40,7 @@ transition_to_recover_mode(){
 MATCH -- '-r \+0' < /tmp/mock-shutdown.calls
 
 # restore shutdown so that spread can reboot the host
- umount /usr/sbin/shutdown
+ umount /bin/systemctl
 
 # with the external backend, we do not have the special snapd snap with
 # the first-boot run mode tweaks as created from $TESTLIB/prepare.sh's