patches and low-level development discussion
 help / color / mirror / code / Atom feed
From: Demi Marie Obenour <demiobenour@gmail.com>
To: Alyssa Ross <hi@alyssa.is>, devel@spectrum-os.org
Subject: Re: [PATCH v2 5/7] tools/vm-set-persist.c: init
Date: Sat, 13 Dec 2025 23:52:18 -0500	[thread overview]
Message-ID: <219eac4a-133a-4d8c-b289-2da42274ce6e@gmail.com> (raw)
In-Reply-To: <20251214014229.775825-10-hi@alyssa.is>


[-- Attachment #1.1.1: Type: text/plain, Size: 8813 bytes --]

On 12/13/25 20:42, Alyssa Ross wrote:
> This allows the disk-backed directory of a running VM to be made
> persistent, with a user-provided name.  This is done by renaming the
> directory to have a "persist." prefix rather than the "tmp." one the
> cleaner will look for.  Since the VM's virtiofsd accesses the
> directory via a bind mount, this rename will be unnoticeable to the
> guest.
> 
> musl has quite a bit of catching up to do with the APIs used here,
> which requires the use of a lot of raw syscalls.  This even applies to
> some syscalls musl has wrappers for, like mkdirat(2), because musl's
> mkdirat() comes from <sys/stat.h>, which defines a struct statx that's
> missing the stx_mnt_id member we need.  There doesn't even seem to be
> a SYS_statmount, so we use __NR_ constants throughout for consistency.
> 
> Signed-off-by: Alyssa Ross <hi@alyssa.is>
> ---
> v2: new this round
> 
>  tools/default.nix      |   1 +
>  tools/meson.build      |   4 +
>  tools/vm-set-persist.c | 179 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 184 insertions(+)
>  create mode 100644 tools/vm-set-persist.c
> 
> diff --git a/tools/default.nix b/tools/default.nix
> index 56f41cd9..f094594f 100644
> --- a/tools/default.nix
> +++ b/tools/default.nix
> @@ -78,6 +78,7 @@ stdenv.mkDerivation (finalAttrs: {
>        ./start-vmm
>        ./subprojects
>        ./updates-dir-check.c
> +      ./vm-set-persist.c
>      ] ++ lib.optionals driverSupport [
>        ./xdp-forwarder
>      ]));
> diff --git a/tools/meson.build b/tools/meson.build
> index 666483b3..06aa24d7 100644
> --- a/tools/meson.build
> +++ b/tools/meson.build
> @@ -37,6 +37,10 @@ if get_option('host')
>    executable('updates-dir-check', 'updates-dir-check.c',
>      c_args : '-D_GNU_SOURCE',
>      install: true)
> +
> +  executable('vm-set-persist', 'vm-set-persist.c',
> +    c_args : '-D_GNU_SOURCE',
> +    install: true)
>  endif
>  
>  if get_option('build')
> diff --git a/tools/vm-set-persist.c b/tools/vm-set-persist.c
> new file mode 100644
> index 00000000..ac759504
> --- /dev/null
> +++ b/tools/vm-set-persist.c
> @@ -0,0 +1,179 @@
> +// SPDX-FileCopyrightText: 2025 Alyssa Ross <hi@alyssa.is>
> +// SPDX-License-Identifier: EUPL-1.2+
> +
> +#include <err.h>
> +#include <fcntl.h>
> +#include <libgen.h>
> +#include <unistd.h>
> +#include <sched.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <string.h>
> +
> +// No <sys/stat.h> until musl declares stx_mnt_id.
> +#include <sys/syscall.h>
> +
> +#include <linux/fs.h>
> +#include <linux/mount.h>
> +#include <linux/openat2.h>
> +#include <linux/stat.h>
> +#include <linux/unistd.h>
> +
> +// Including trailing NUL bytes.
> +static const int MNT_ROOT_MAX_LEN = 43;
> +static const int SOURCE_MAX_LEN = 28;
> +
> +static void set_mount_namespace(const char vm_id[static 1])
> +{
> +	char ns_path[28];
> +	int r = snprintf(ns_path, sizeof ns_path,
> +	                 "/run/vm/by-id/%s/ns/mnt", vm_id);
> +
> +	if (r == -1)
> +		err(EXIT_FAILURE, "snprintf");
> +	if ((size_t)r >= sizeof ns_path)
> +		errx(EXIT_FAILURE, "VM ID unexpectedly long");
> +
> +	if ((r = open(ns_path, O_RDONLY | O_CLOEXEC)) == -1)
> +		err(EXIT_FAILURE, "open");
> +	if (setns(r, CLONE_NEWNS) == -1)
> +		err(EXIT_FAILURE, "setns");
> +	close(r);
> +}
> +
> +static void do_statx(const char path[static 1],
> +                     mode_t mode[static 1], uint64_t mnt_id[static 1])
> +{
> +	struct statx stx;
> +
> +	if (syscall(__NR_statx, AT_FDCWD, path, AT_SYMLINK_NOFOLLOW,
> +	            STATX_MODE | STATX_MNT_ID_UNIQUE, &stx) == -1)
> +		err(EXIT_FAILURE, "statx");

Here (and below), I recommend using wrapper functions around syscalls
instead of raw syscalls.  This makes it much easier to check the
types used against the manpage.

This is also missing casts to long.

> +	if (!(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT)) {
> +		if (stx.stx_attributes_mask & STATX_ATTR_MOUNT_ROOT)
> +			errx(EXIT_FAILURE,
> +			     "VM disk-backed directory not mounted");
> +
> +		errx(EXIT_FAILURE, "statx didn't return STATX_ATTR_MOUNT_ROOT");
> +	}
> +
> +	if (!(stx.stx_mask & STATX_MNT_ID_UNIQUE))
> +		errx(EXIT_FAILURE, "statx didn't return STATX_MNT_ID_UNIQUE");
> +	if (!(stx.stx_mask & STATX_MODE))
> +		errx(EXIT_FAILURE, "statx didn't return STATX_MODE");
> +
> +	*mode = stx.stx_mode;
> +	*mnt_id = stx.stx_mnt_id;
> +}
> +
> +static int do_mount(const char source[static 1])
> +{
> +	int mnt, fs = syscall(__NR_fsopen, "btrfs", FSOPEN_CLOEXEC);
> +	if (fs == -1)
> +		err(EXIT_FAILURE, "fsopen");
> +	if (syscall(__NR_fsconfig, fs, FSCONFIG_SET_STRING,
> +	            "source", source, 0) == -1)

This might be too paranoid, but if possible I would use openat2()
with RESOLVE_NO_SYMLINKS to get a file descriptor to the source,
and then use /proc/thread-self/fd/FD_NUMBER here.

> +		err(EXIT_FAILURE, "FSCONFIG_SET_STRING source");
> +	if (syscall(__NR_fsconfig, fs, FSCONFIG_SET_FLAG,
> +	            "rw", nullptr, 0) == -1)
> +		err(EXIT_FAILURE, "FSCONFIG_SET_FLAG rw");
> +	if (syscall(__NR_fsconfig, fs, FSCONFIG_CMD_CREATE,
> +	            nullptr, nullptr, 0) == -1)
> +		err(EXIT_FAILURE, "FSCONFIG_CMD_CREATE");
> +	if ((mnt = syscall(__NR_fsmount, fs, FSMOUNT_CLOEXEC,
> +	                   MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOSYMFOLLOW |
> +	                   MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV)) == -1)
> +		err(EXIT_FAILURE, "fsmount");
> +	close(fs);
> +	return mnt;
> +}
> +
> +static void do_statmount(uint64_t mnt_id,
> +                         char mnt_root[static MNT_ROOT_MAX_LEN],
> +                         char source[static SOURCE_MAX_LEN])
> +{
> +	int r;
> +	char sm_buf[sizeof(struct statmount) +
> +	            MNT_ROOT_MAX_LEN + SOURCE_MAX_LEN];
> +	struct statmount *sm = (struct statmount *)sm_buf;
> +	struct mnt_id_req req = {
> +		.size = sizeof req,
> +		.mnt_id = mnt_id,
> +		.param = STATMOUNT_MNT_ROOT | STATMOUNT_SB_SOURCE,
> +	};
> +
> +	if (syscall(__NR_statmount, &req, sm, sizeof sm_buf, 0) == -1)
> +		err(EXIT_FAILURE, "statmount");
> +
> +	r = snprintf(mnt_root, MNT_ROOT_MAX_LEN, "%s", sm->str + sm->mnt_root);
> +	if (r == -1)
> +		err(EXIT_FAILURE, "snprintf");
> +	if (r >= MNT_ROOT_MAX_LEN)
> +		errx(EXIT_FAILURE, "unexpectedly long mnt_root");
> +
> +	r = snprintf(source, SOURCE_MAX_LEN, "%s", sm->str + sm->sb_source);
> +	if (r == -1)
> +		err(EXIT_FAILURE, "snprintf");
> +	if (r >= SOURCE_MAX_LEN)
> +		errx(EXIT_FAILURE, "unexpectedly long sb_source");
> +}

Here and elsewhere, I suggest a wrapper around snprintf().

> +static void do_rename(int mnt, const char dir_name[static 1],
> +                      const char old_name[static 1],
> +                      const char new_name[static 1], mode_t mode)
> +{
> +	struct open_how how = {
> +		.flags = O_PATH | O_CLOEXEC | O_DIRECTORY | O_NOFOLLOW,
> +		.resolve = RESOLVE_NO_MAGICLINKS | RESOLVE_IN_ROOT |
> +		           RESOLVE_NO_SYMLINKS | RESOLVE_NO_XDEV,
> +	};
> +	int dir = syscall(__NR_openat2, mnt, dir_name, &how, sizeof how);
> +	if (dir == -1)
> +		err(EXIT_FAILURE, "openat2");
> +
> +	if (syscall(__NR_mkdirat, dir, new_name, mode) == -1)
> +		err(EXIT_FAILURE, "mkdirat");
> +	if (syscall(__NR_renameat2, dir, old_name, dir, new_name,
> +	            RENAME_EXCHANGE) == -1)
> +		err(EXIT_FAILURE, "renameat2");
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int mnt;
> +	mode_t mode;
> +	uint64_t mnt_id;
> +	char *disk_path, *dir_name, *old_name, *new_name,
> +	     mnt_root[MNT_ROOT_MAX_LEN], source[SOURCE_MAX_LEN];
> +
> +	if (argc != 3) {
> +		fprintf(stderr, "Usage: vm-set-persist ID INSTANCE\n");
> +		exit(EXIT_FAILURE);
> +	}
> +
> +	if (strchr(argv[1], '/'))
> +		errx(EXIT_FAILURE, "invalid VM ID");

I'd check for ".", "..", empty string, and NAME_MAX (255) as well.

> +	if (strchr(argv[2], '/'))
> +		errx(EXIT_FAILURE, "invalid persistent directory name");> +	if (asprintf(&disk_path, "/run/fs/%s/disk", argv[1]) == -1)
> +		err(EXIT_FAILURE, "asprintf");
> +	if (asprintf(&new_name, "persist.%s", argv[2]) == -1)
> +		err(EXIT_FAILURE, "asprintf");

I'd check that this doesn't go beyond NAME_MAX.

> +	set_mount_namespace(argv[1]);
> +
> +	do_statx(disk_path, &mode, &mnt_id);
> +	do_statmount(mnt_id, mnt_root, source);
> +
> +	if (!(dir_name = strdup(mnt_root)))
> +		err(EXIT_FAILURE, "strdup");
> +	dir_name = dirname(dir_name);
> +	old_name = basename(mnt_root);
> +
> +	mnt = do_mount(source);
> +
> +	do_rename(mnt, dir_name, old_name, new_name, mode);
> +}
-- 
Sincerely,
Demi Marie Obenour (she/her/hers)

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 7253 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2025-12-14  4:52 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-14  1:42 [PATCH v2 1/7] host/rootfs: make fs root directories shared Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 2/7] host/rootfs: give VMs a disk-backed directory Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 3/7] host/rootfs: clean up obsolete tmp dirs on VM exit Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 4/7] host/rootfs: clean up obsolete tmp dirs on mount Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 5/7] tools/vm-set-persist.c: init Alyssa Ross
2025-12-14  4:52   ` Demi Marie Obenour [this message]
2025-12-14 10:49     ` Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 6/7] host/rootfs: run transient VMs with persistence Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14  1:42 ` [PATCH v2 7/7] Documentation: document persistence Alyssa Ross
2025-12-14 12:55   ` Alyssa Ross
2025-12-14 12:55 ` [PATCH v2 1/7] host/rootfs: make fs root directories shared Alyssa Ross

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=219eac4a-133a-4d8c-b289-2da42274ce6e@gmail.com \
    --to=demiobenour@gmail.com \
    --cc=devel@spectrum-os.org \
    --cc=hi@alyssa.is \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://spectrum-os.org/git/crosvm
	https://spectrum-os.org/git/doc
	https://spectrum-os.org/git/mktuntap
	https://spectrum-os.org/git/nixpkgs
	https://spectrum-os.org/git/spectrum
	https://spectrum-os.org/git/ucspi-vsock
	https://spectrum-os.org/git/www

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).