On 12/13/25 20:42, Alyssa Ross wrote: > This allows the disk-backed directory of a running VM to be made > persistent, with a user-provided name. This is done by renaming the > directory to have a "persist." prefix rather than the "tmp." one the > cleaner will look for. Since the VM's virtiofsd accesses the > directory via a bind mount, this rename will be unnoticeable to the > guest. > > musl has quite a bit of catching up to do with the APIs used here, > which requires the use of a lot of raw syscalls. This even applies to > some syscalls musl has wrappers for, like mkdirat(2), because musl's > mkdirat() comes from , which defines a struct statx that's > missing the stx_mnt_id member we need. There doesn't even seem to be > a SYS_statmount, so we use __NR_ constants throughout for consistency. > > Signed-off-by: Alyssa Ross > --- > v2: new this round > > tools/default.nix | 1 + > tools/meson.build | 4 + > tools/vm-set-persist.c | 179 +++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 184 insertions(+) > create mode 100644 tools/vm-set-persist.c > > diff --git a/tools/default.nix b/tools/default.nix > index 56f41cd9..f094594f 100644 > --- a/tools/default.nix > +++ b/tools/default.nix > @@ -78,6 +78,7 @@ stdenv.mkDerivation (finalAttrs: { > ./start-vmm > ./subprojects > ./updates-dir-check.c > + ./vm-set-persist.c > ] ++ lib.optionals driverSupport [ > ./xdp-forwarder > ])); > diff --git a/tools/meson.build b/tools/meson.build > index 666483b3..06aa24d7 100644 > --- a/tools/meson.build > +++ b/tools/meson.build > @@ -37,6 +37,10 @@ if get_option('host') > executable('updates-dir-check', 'updates-dir-check.c', > c_args : '-D_GNU_SOURCE', > install: true) > + > + executable('vm-set-persist', 'vm-set-persist.c', > + c_args : '-D_GNU_SOURCE', > + install: true) > endif > > if get_option('build') > diff --git a/tools/vm-set-persist.c b/tools/vm-set-persist.c > new file mode 100644 > index 00000000..ac759504 > --- /dev/null > +++ b/tools/vm-set-persist.c > @@ -0,0 +1,179 @@ > +// SPDX-FileCopyrightText: 2025 Alyssa Ross > +// SPDX-License-Identifier: EUPL-1.2+ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +// No until musl declares stx_mnt_id. > +#include > + > +#include > +#include > +#include > +#include > +#include > + > +// Including trailing NUL bytes. > +static const int MNT_ROOT_MAX_LEN = 43; > +static const int SOURCE_MAX_LEN = 28; > + > +static void set_mount_namespace(const char vm_id[static 1]) > +{ > + char ns_path[28]; > + int r = snprintf(ns_path, sizeof ns_path, > + "/run/vm/by-id/%s/ns/mnt", vm_id); > + > + if (r == -1) > + err(EXIT_FAILURE, "snprintf"); > + if ((size_t)r >= sizeof ns_path) > + errx(EXIT_FAILURE, "VM ID unexpectedly long"); > + > + if ((r = open(ns_path, O_RDONLY | O_CLOEXEC)) == -1) > + err(EXIT_FAILURE, "open"); > + if (setns(r, CLONE_NEWNS) == -1) > + err(EXIT_FAILURE, "setns"); > + close(r); > +} > + > +static void do_statx(const char path[static 1], > + mode_t mode[static 1], uint64_t mnt_id[static 1]) > +{ > + struct statx stx; > + > + if (syscall(__NR_statx, AT_FDCWD, path, AT_SYMLINK_NOFOLLOW, > + STATX_MODE | STATX_MNT_ID_UNIQUE, &stx) == -1) > + err(EXIT_FAILURE, "statx"); Here (and below), I recommend using wrapper functions around syscalls instead of raw syscalls. This makes it much easier to check the types used against the manpage. This is also missing casts to long. > + if (!(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT)) { > + if (stx.stx_attributes_mask & STATX_ATTR_MOUNT_ROOT) > + errx(EXIT_FAILURE, > + "VM disk-backed directory not mounted"); > + > + errx(EXIT_FAILURE, "statx didn't return STATX_ATTR_MOUNT_ROOT"); > + } > + > + if (!(stx.stx_mask & STATX_MNT_ID_UNIQUE)) > + errx(EXIT_FAILURE, "statx didn't return STATX_MNT_ID_UNIQUE"); > + if (!(stx.stx_mask & STATX_MODE)) > + errx(EXIT_FAILURE, "statx didn't return STATX_MODE"); > + > + *mode = stx.stx_mode; > + *mnt_id = stx.stx_mnt_id; > +} > + > +static int do_mount(const char source[static 1]) > +{ > + int mnt, fs = syscall(__NR_fsopen, "btrfs", FSOPEN_CLOEXEC); > + if (fs == -1) > + err(EXIT_FAILURE, "fsopen"); > + if (syscall(__NR_fsconfig, fs, FSCONFIG_SET_STRING, > + "source", source, 0) == -1) This might be too paranoid, but if possible I would use openat2() with RESOLVE_NO_SYMLINKS to get a file descriptor to the source, and then use /proc/thread-self/fd/FD_NUMBER here. > + err(EXIT_FAILURE, "FSCONFIG_SET_STRING source"); > + if (syscall(__NR_fsconfig, fs, FSCONFIG_SET_FLAG, > + "rw", nullptr, 0) == -1) > + err(EXIT_FAILURE, "FSCONFIG_SET_FLAG rw"); > + if (syscall(__NR_fsconfig, fs, FSCONFIG_CMD_CREATE, > + nullptr, nullptr, 0) == -1) > + err(EXIT_FAILURE, "FSCONFIG_CMD_CREATE"); > + if ((mnt = syscall(__NR_fsmount, fs, FSMOUNT_CLOEXEC, > + MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOSYMFOLLOW | > + MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV)) == -1) > + err(EXIT_FAILURE, "fsmount"); > + close(fs); > + return mnt; > +} > + > +static void do_statmount(uint64_t mnt_id, > + char mnt_root[static MNT_ROOT_MAX_LEN], > + char source[static SOURCE_MAX_LEN]) > +{ > + int r; > + char sm_buf[sizeof(struct statmount) + > + MNT_ROOT_MAX_LEN + SOURCE_MAX_LEN]; > + struct statmount *sm = (struct statmount *)sm_buf; > + struct mnt_id_req req = { > + .size = sizeof req, > + .mnt_id = mnt_id, > + .param = STATMOUNT_MNT_ROOT | STATMOUNT_SB_SOURCE, > + }; > + > + if (syscall(__NR_statmount, &req, sm, sizeof sm_buf, 0) == -1) > + err(EXIT_FAILURE, "statmount"); > + > + r = snprintf(mnt_root, MNT_ROOT_MAX_LEN, "%s", sm->str + sm->mnt_root); > + if (r == -1) > + err(EXIT_FAILURE, "snprintf"); > + if (r >= MNT_ROOT_MAX_LEN) > + errx(EXIT_FAILURE, "unexpectedly long mnt_root"); > + > + r = snprintf(source, SOURCE_MAX_LEN, "%s", sm->str + sm->sb_source); > + if (r == -1) > + err(EXIT_FAILURE, "snprintf"); > + if (r >= SOURCE_MAX_LEN) > + errx(EXIT_FAILURE, "unexpectedly long sb_source"); > +} Here and elsewhere, I suggest a wrapper around snprintf(). > +static void do_rename(int mnt, const char dir_name[static 1], > + const char old_name[static 1], > + const char new_name[static 1], mode_t mode) > +{ > + struct open_how how = { > + .flags = O_PATH | O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW, > + .resolve = RESOLVE_NO_MAGICLINKS | RESOLVE_IN_ROOT | > + RESOLVE_NO_SYMLINKS | RESOLVE_NO_XDEV, > + }; > + int dir = syscall(__NR_openat2, mnt, dir_name, &how, sizeof how); > + if (dir == -1) > + err(EXIT_FAILURE, "openat2"); > + > + if (syscall(__NR_mkdirat, dir, new_name, mode) == -1) > + err(EXIT_FAILURE, "mkdirat"); > + if (syscall(__NR_renameat2, dir, old_name, dir, new_name, > + RENAME_EXCHANGE) == -1) > + err(EXIT_FAILURE, "renameat2"); > +} > + > +int main(int argc, char *argv[]) > +{ > + int mnt; > + mode_t mode; > + uint64_t mnt_id; > + char *disk_path, *dir_name, *old_name, *new_name, > + mnt_root[MNT_ROOT_MAX_LEN], source[SOURCE_MAX_LEN]; > + > + if (argc != 3) { > + fprintf(stderr, "Usage: vm-set-persist ID INSTANCE\n"); > + exit(EXIT_FAILURE); > + } > + > + if (strchr(argv[1], '/')) > + errx(EXIT_FAILURE, "invalid VM ID"); I'd check for ".", "..", empty string, and NAME_MAX (255) as well. > + if (strchr(argv[2], '/')) > + errx(EXIT_FAILURE, "invalid persistent directory name");> + if (asprintf(&disk_path, "/run/fs/%s/disk", argv[1]) == -1) > + err(EXIT_FAILURE, "asprintf"); > + if (asprintf(&new_name, "persist.%s", argv[2]) == -1) > + err(EXIT_FAILURE, "asprintf"); I'd check that this doesn't go beyond NAME_MAX. > + set_mount_namespace(argv[1]); > + > + do_statx(disk_path, &mode, &mnt_id); > + do_statmount(mnt_id, mnt_root, source); > + > + if (!(dir_name = strdup(mnt_root))) > + err(EXIT_FAILURE, "strdup"); > + dir_name = dirname(dir_name); > + old_name = basename(mnt_root); > + > + mnt = do_mount(source); > + > + do_rename(mnt, dir_name, old_name, new_name, mode); > +} -- Sincerely, Demi Marie Obenour (she/her/hers)