From 0dc4f1b0910d7b96ea9743493db7fd2d841164da Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Jul 09 2021 17:06:24 +0000 Subject: core: do not serialize mounts and automounts for switch-root When e.g. tmp.mount is present in the initrd, and we serialize it, switch root, and deserialize, the new systemd is confused because it thinks /tmp is mounted. In general, it doesn't make sense to serialize anything that refers to paths in the old root file system. This fixes two errors for me: 1. tmp.mount was not mounted properly before local-fs.target. It would be mounted as some point (I guess when we re-read /proc/self/mountinfo for some other reason). In effect systemd-tmpfiles-setup.service would see one fs, and some other units started later a different one. In particular gdm.service would fail because the pre-created /tmp/.X11-unix with proper permissions would not exist at time it was started. 2. # systemd[1]: proc-sys-fs-binfmt_misc.automount: Got hangup/error on autofs pipe from kernel. Likely our automount point has been unmounted by someone or something else? # systemd[1]: proc-sys-fs-binfmt_misc.automount: Failed with result 'unmounted'. # systemd[1]: Mounting proc-sys-fs-binfmt_misc.mount... # systemd[1]: Mounted proc-sys-fs-binfmt_misc.mount. # systemd[1]: Starting systemd-binfmt.service... # systemd[1]: Finished systemd-binfmt.service. # systemd[1]: proc-sys-fs-binfmt_misc.automount: Path /proc/sys/fs/binfmt_misc is already a mount point, refusing start. # systemd[1]: Failed to set up automount proc-sys-fs-binfmt_misc.automount. # systemd[1]: proc-sys-fs-binfmt_misc.automount: Path /proc/sys/fs/binfmt_misc is already a mount point, refusing start. # systemd[1]: Failed to set up automount proc-sys-fs-binfmt_misc.automount. # systemd[1]: proc-sys-fs-binfmt_misc.automount: Path /proc/sys/fs/binfmt_misc is already a mount point, refusing start. # systemd[1]: Failed to set up automount proc-sys-fs-binfmt_misc.automount. # systemd[1]: Stopping systemd-binfmt.service... # systemd[1]: systemd-binfmt.service: Deactivated successfully. # systemd[1]: Stopped systemd-binfmt.service. I couldn't understand the error here, but in retrospect the first line is entirely correct: "someone or something else" was the old systemd unmounting the old root. (cherry picked from commit 755021d43448011ef169f20ec3a08d4e92c824af) --- diff --git a/src/core/automount.c b/src/core/automount.c index f0fa5c8..c74af3c 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -1087,6 +1087,7 @@ const UnitVTable automount_vtable = { .can_transient = true, .can_fail = true, .can_trigger = true, + .exclude_from_switch_root_serialization = true, .init = automount_init, .load = automount_load, diff --git a/src/core/manager.c b/src/core/manager.c index 688e688..a5f3123 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3306,11 +3306,7 @@ int manager_serialize( if (u->id != t) continue; - /* Start marker */ - fputs(u->id, f); - fputc('\n', f); - - r = unit_serialize(u, f, fds, !switching_root); + r = unit_serialize(u, f, fds, switching_root); if (r < 0) return r; } diff --git a/src/core/mount.c b/src/core/mount.c index 5a757c4..5c48ead 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -2153,6 +2153,7 @@ const UnitVTable mount_vtable = { .can_transient = true, .can_fail = true, + .exclude_from_switch_root_serialization = true, .init = mount_init, .load = mount_load, diff --git a/src/core/unit-serialize.c b/src/core/unit-serialize.c index 3f09924..28d585d 100644 --- a/src/core/unit-serialize.c +++ b/src/core/unit-serialize.c @@ -88,13 +88,25 @@ static const char *const io_accounting_metric_field_last[_CGROUP_IO_ACCOUNTING_M [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last", }; -int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { +int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool switching_root) { int r; assert(u); assert(f); assert(fds); + if (switching_root && UNIT_VTABLE(u)->exclude_from_switch_root_serialization) { + /* In the new root, paths for mounts and automounts will be different, so it doesn't make + * much sense to serialize things. API file systems will be moved to the new root, but we + * don't have mount units for those. */ + log_unit_debug(u, "not serializing before switch-root"); + return 0; + } + + /* Start marker */ + fputs(u->id, f); + fputc('\n', f); + if (unit_can_serialize(u)) { r = UNIT_VTABLE(u)->serialize(u, f, fds); if (r < 0) @@ -172,7 +184,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { (void) serialize_item_format(f, ip_accounting_metric_field[m], "%" PRIu64, v); } - if (serialize_jobs) { + if (!switching_root) { if (u->job) { fputs("job\n", f); job_serialize(u->job, f); diff --git a/src/core/unit.h b/src/core/unit.h index 264431d..a34d260 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -636,6 +636,9 @@ typedef struct UnitVTable { /* True if units of this type shall be startable only once and then never again */ bool once_only:1; + /* Do not serialize this unit when preparing for root switch */ + bool exclude_from_switch_root_serialization; + /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */ bool gc_jobs:1;