patches and low-level development discussion
 help / color / mirror / code / Atom feed
From: Alyssa Ross <hi@alyssa.is>
To: devel@spectrum-os.org
Cc: Demi Marie Obenour <demiobenour@gmail.com>
Subject: [PATCH v2 5/7] tools/vm-set-persist.c: init
Date: Sun, 14 Dec 2025 02:42:32 +0100	[thread overview]
Message-ID: <20251214014229.775825-10-hi@alyssa.is> (raw)
In-Reply-To: <20251214014229.775825-2-hi@alyssa.is>

This allows the disk-backed directory of a running VM to be made
persistent, with a user-provided name.  This is done by renaming the
directory to have a "persist." prefix rather than the "tmp." one the
cleaner will look for.  Since the VM's virtiofsd accesses the
directory via a bind mount, this rename will be unnoticeable to the
guest.

musl has quite a bit of catching up to do with the APIs used here,
which requires the use of a lot of raw syscalls.  This even applies to
some syscalls musl has wrappers for, like mkdirat(2), because musl's
mkdirat() comes from <sys/stat.h>, which defines a struct statx that's
missing the stx_mnt_id member we need.  There doesn't even seem to be
a SYS_statmount, so we use __NR_ constants throughout for consistency.

Signed-off-by: Alyssa Ross <hi@alyssa.is>
---
v2: new this round

 tools/default.nix      |   1 +
 tools/meson.build      |   4 +
 tools/vm-set-persist.c | 179 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 184 insertions(+)
 create mode 100644 tools/vm-set-persist.c

diff --git a/tools/default.nix b/tools/default.nix
index 56f41cd9..f094594f 100644
--- a/tools/default.nix
+++ b/tools/default.nix
@@ -78,6 +78,7 @@ stdenv.mkDerivation (finalAttrs: {
       ./start-vmm
       ./subprojects
       ./updates-dir-check.c
+      ./vm-set-persist.c
     ] ++ lib.optionals driverSupport [
       ./xdp-forwarder
     ]));
diff --git a/tools/meson.build b/tools/meson.build
index 666483b3..06aa24d7 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -37,6 +37,10 @@ if get_option('host')
   executable('updates-dir-check', 'updates-dir-check.c',
     c_args : '-D_GNU_SOURCE',
     install: true)
+
+  executable('vm-set-persist', 'vm-set-persist.c',
+    c_args : '-D_GNU_SOURCE',
+    install: true)
 endif
 
 if get_option('build')
diff --git a/tools/vm-set-persist.c b/tools/vm-set-persist.c
new file mode 100644
index 00000000..ac759504
--- /dev/null
+++ b/tools/vm-set-persist.c
@@ -0,0 +1,179 @@
+// SPDX-FileCopyrightText: 2025 Alyssa Ross <hi@alyssa.is>
+// SPDX-License-Identifier: EUPL-1.2+
+
+#include <err.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+// No <sys/stat.h> until musl declares stx_mnt_id.
+#include <sys/syscall.h>
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/openat2.h>
+#include <linux/stat.h>
+#include <linux/unistd.h>
+
+// Including trailing NUL bytes.
+static const int MNT_ROOT_MAX_LEN = 43;
+static const int SOURCE_MAX_LEN = 28;
+
+static void set_mount_namespace(const char vm_id[static 1])
+{
+	char ns_path[28];
+	int r = snprintf(ns_path, sizeof ns_path,
+	                 "/run/vm/by-id/%s/ns/mnt", vm_id);
+
+	if (r == -1)
+		err(EXIT_FAILURE, "snprintf");
+	if ((size_t)r >= sizeof ns_path)
+		errx(EXIT_FAILURE, "VM ID unexpectedly long");
+
+	if ((r = open(ns_path, O_RDONLY | O_CLOEXEC)) == -1)
+		err(EXIT_FAILURE, "open");
+	if (setns(r, CLONE_NEWNS) == -1)
+		err(EXIT_FAILURE, "setns");
+	close(r);
+}
+
+static void do_statx(const char path[static 1],
+                     mode_t mode[static 1], uint64_t mnt_id[static 1])
+{
+	struct statx stx;
+
+	if (syscall(__NR_statx, AT_FDCWD, path, AT_SYMLINK_NOFOLLOW,
+	            STATX_MODE | STATX_MNT_ID_UNIQUE, &stx) == -1)
+		err(EXIT_FAILURE, "statx");
+
+	if (!(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT)) {
+		if (stx.stx_attributes_mask & STATX_ATTR_MOUNT_ROOT)
+			errx(EXIT_FAILURE,
+			     "VM disk-backed directory not mounted");
+
+		errx(EXIT_FAILURE, "statx didn't return STATX_ATTR_MOUNT_ROOT");
+	}
+
+	if (!(stx.stx_mask & STATX_MNT_ID_UNIQUE))
+		errx(EXIT_FAILURE, "statx didn't return STATX_MNT_ID_UNIQUE");
+	if (!(stx.stx_mask & STATX_MODE))
+		errx(EXIT_FAILURE, "statx didn't return STATX_MODE");
+
+	*mode = stx.stx_mode;
+	*mnt_id = stx.stx_mnt_id;
+}
+
+static int do_mount(const char source[static 1])
+{
+	int mnt, fs = syscall(__NR_fsopen, "btrfs", FSOPEN_CLOEXEC);
+	if (fs == -1)
+		err(EXIT_FAILURE, "fsopen");
+	if (syscall(__NR_fsconfig, fs, FSCONFIG_SET_STRING,
+	            "source", source, 0) == -1)
+		err(EXIT_FAILURE, "FSCONFIG_SET_STRING source");
+	if (syscall(__NR_fsconfig, fs, FSCONFIG_SET_FLAG,
+	            "rw", nullptr, 0) == -1)
+		err(EXIT_FAILURE, "FSCONFIG_SET_FLAG rw");
+	if (syscall(__NR_fsconfig, fs, FSCONFIG_CMD_CREATE,
+	            nullptr, nullptr, 0) == -1)
+		err(EXIT_FAILURE, "FSCONFIG_CMD_CREATE");
+	if ((mnt = syscall(__NR_fsmount, fs, FSMOUNT_CLOEXEC,
+	                   MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOSYMFOLLOW |
+	                   MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV)) == -1)
+		err(EXIT_FAILURE, "fsmount");
+	close(fs);
+	return mnt;
+}
+
+static void do_statmount(uint64_t mnt_id,
+                         char mnt_root[static MNT_ROOT_MAX_LEN],
+                         char source[static SOURCE_MAX_LEN])
+{
+	int r;
+	char sm_buf[sizeof(struct statmount) +
+	            MNT_ROOT_MAX_LEN + SOURCE_MAX_LEN];
+	struct statmount *sm = (struct statmount *)sm_buf;
+	struct mnt_id_req req = {
+		.size = sizeof req,
+		.mnt_id = mnt_id,
+		.param = STATMOUNT_MNT_ROOT | STATMOUNT_SB_SOURCE,
+	};
+
+	if (syscall(__NR_statmount, &req, sm, sizeof sm_buf, 0) == -1)
+		err(EXIT_FAILURE, "statmount");
+
+	r = snprintf(mnt_root, MNT_ROOT_MAX_LEN, "%s", sm->str + sm->mnt_root);
+	if (r == -1)
+		err(EXIT_FAILURE, "snprintf");
+	if (r >= MNT_ROOT_MAX_LEN)
+		errx(EXIT_FAILURE, "unexpectedly long mnt_root");
+
+	r = snprintf(source, SOURCE_MAX_LEN, "%s", sm->str + sm->sb_source);
+	if (r == -1)
+		err(EXIT_FAILURE, "snprintf");
+	if (r >= SOURCE_MAX_LEN)
+		errx(EXIT_FAILURE, "unexpectedly long sb_source");
+}
+
+static void do_rename(int mnt, const char dir_name[static 1],
+                      const char old_name[static 1],
+                      const char new_name[static 1], mode_t mode)
+{
+	struct open_how how = {
+		.flags = O_PATH | O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW,
+		.resolve = RESOLVE_NO_MAGICLINKS | RESOLVE_IN_ROOT |
+		           RESOLVE_NO_SYMLINKS | RESOLVE_NO_XDEV,
+	};
+	int dir = syscall(__NR_openat2, mnt, dir_name, &how, sizeof how);
+	if (dir == -1)
+		err(EXIT_FAILURE, "openat2");
+
+	if (syscall(__NR_mkdirat, dir, new_name, mode) == -1)
+		err(EXIT_FAILURE, "mkdirat");
+	if (syscall(__NR_renameat2, dir, old_name, dir, new_name,
+	            RENAME_EXCHANGE) == -1)
+		err(EXIT_FAILURE, "renameat2");
+}
+
+int main(int argc, char *argv[])
+{
+	int mnt;
+	mode_t mode;
+	uint64_t mnt_id;
+	char *disk_path, *dir_name, *old_name, *new_name,
+	     mnt_root[MNT_ROOT_MAX_LEN], source[SOURCE_MAX_LEN];
+
+	if (argc != 3) {
+		fprintf(stderr, "Usage: vm-set-persist ID INSTANCE\n");
+		exit(EXIT_FAILURE);
+	}
+
+	if (strchr(argv[1], '/'))
+		errx(EXIT_FAILURE, "invalid VM ID");
+	if (strchr(argv[2], '/'))
+		errx(EXIT_FAILURE, "invalid persistent directory name");
+
+	if (asprintf(&disk_path, "/run/fs/%s/disk", argv[1]) == -1)
+		err(EXIT_FAILURE, "asprintf");
+	if (asprintf(&new_name, "persist.%s", argv[2]) == -1)
+		err(EXIT_FAILURE, "asprintf");
+
+	set_mount_namespace(argv[1]);
+
+	do_statx(disk_path, &mode, &mnt_id);
+	do_statmount(mnt_id, mnt_root, source);
+
+	if (!(dir_name = strdup(mnt_root)))
+		err(EXIT_FAILURE, "strdup");
+	dir_name = dirname(dir_name);
+	old_name = basename(mnt_root);
+
+	mnt = do_mount(source);
+
+	do_rename(mnt, dir_name, old_name, new_name, mode);
+}
-- 
2.51.0


  parent reply	other threads:[~2025-12-14  1:48 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-14  1:42 [PATCH v2 1/7] host/rootfs: make fs root directories shared Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 2/7] host/rootfs: give VMs a disk-backed directory Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 3/7] host/rootfs: clean up obsolete tmp dirs on VM exit Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 4/7] host/rootfs: clean up obsolete tmp dirs on mount Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` Alyssa Ross [this message]
2025-12-14  4:52   ` [PATCH v2 5/7] tools/vm-set-persist.c: init Demi Marie Obenour
2025-12-14 10:49     ` Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 6/7] host/rootfs: run transient VMs with persistence Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 7/7] Documentation: document persistence Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14 12:55 ` [PATCH v2 1/7] host/rootfs: make fs root directories shared Alyssa Ross

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251214014229.775825-10-hi@alyssa.is \
    --to=hi@alyssa.is \
    --cc=demiobenour@gmail.com \
    --cc=devel@spectrum-os.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://spectrum-os.org/git/crosvm
	https://spectrum-os.org/git/doc
	https://spectrum-os.org/git/mktuntap
	https://spectrum-os.org/git/nixpkgs
	https://spectrum-os.org/git/spectrum
	https://spectrum-os.org/git/ucspi-vsock
	https://spectrum-os.org/git/www

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).