Skip to content

Commit

Permalink
criu: Support C/R of pidfds
Browse files Browse the repository at this point in the history
Process file descriptors (pidfds) were introduced to provide a stable
handle on a process. They solve the problem of pid recycling.

For a detailed explanation, see https://lwn.net/Articles/801319/ and
http://www.corsix.org/content/what-is-a-pidfd

Before Linux 6.9, anonymous inodes were used for the implementation of
pidfds. So, we detect them in a fashion similiar to other fd types that
use anonymous inodes by calling `readlink()`.
After 6.9, pidfs (a file system for pidfds) was introduced.
In 6.9 `S_ISREG()` returned true for pidfds, but this again changed with
6.10.
(https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pidfs.c?h=v6.11-rc2#n285)
After this change, pidfs inodes have no file type in st_mode in
userspace.
We use `PID_FS_MAGIC` to detect pidfds for kernel >= 6.9
Hence, check for pidfds occurs before the check for regular files.

For pidfds that refer to dead processes, we lose the pid of the process
as the Pid and NSpid fields in /proc/<pid>/fdinfo/<pidfd> change to -1.
So, we create a temporary process for each unique inode and open pidfds
that refer to this process. After all pidfds have been opened we kill
this temporary process.

This commit does not include support for pidfds that point to a specific
thread, i.e pidfds opened with `PIDFD_THREAD` flag.

Fixes: checkpoint-restore#2258

Signed-off-by: Bhavik Sachdev <[email protected]>
  • Loading branch information
bsach64 committed Oct 3, 2024
1 parent d22b9bd commit e6e9b3b
Show file tree
Hide file tree
Showing 12 changed files with 361 additions and 1 deletion.
1 change: 1 addition & 0 deletions criu/Makefile.crtools
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ obj-$(CONFIG_COMPAT) += vdso-compat.o
CFLAGS_REMOVE_vdso-compat.o += $(CFLAGS-ASAN) $(CFLAGS-GCOV)
obj-y += pidfd-store.o
obj-y += hugetlb.o
obj-y += pidfd.o

PROTOBUF_GEN := scripts/protobuf-gen.sh

Expand Down
3 changes: 2 additions & 1 deletion criu/cr-restore.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
#include "timens.h"
#include "bpfmap.h"
#include "apparmor.h"
#include "pidfd.h"

#include "parasite-syscall.h"
#include "files-reg.h"
Expand Down Expand Up @@ -280,7 +281,7 @@ static struct collect_image_info *cinfos_files[] = {
&unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo,
&netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo,
&tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo,
&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo,
&fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &pidfd_cinfo
};

/* These images are required to restore namespaces */
Expand Down
17 changes: 17 additions & 0 deletions criu/files.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "kerndat.h"
#include "fdstore.h"
#include "bpfmap.h"
#include "pidfd.h"

#include "protobuf.h"
#include "util.h"
Expand Down Expand Up @@ -544,6 +545,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
ops = &signalfd_dump_ops;
else if (is_timerfd_link(link))
ops = &timerfd_dump_ops;
else if (is_pidfd_link(link))
ops = &pidfd_dump_ops;
#ifdef CONFIG_HAS_LIBBPF
else if (is_bpfmap_link(link))
ops = &bpfmap_dump_ops;
Expand All @@ -554,6 +557,11 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts,
return do_dump_gen_file(&p, lfd, ops, e);
}

if (p.fs_type == PID_FS_MAGIC) {
ops = &pidfd_dump_ops;
return do_dump_gen_file(&p, lfd, ops, e);
}

if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || S_ISLNK(p.stat.st_mode)) {
if (fill_fdlink(lfd, &p, &link))
return -1;
Expand Down Expand Up @@ -1778,6 +1786,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i)
case FD_TYPES__MEMFD:
ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo);
break;
case FD_TYPES__PIDFD:
ret = collect_one_file_entry(fe, fe->pidfd->id, &fe->pidfd->base, &pidfd_cinfo);
break;
#ifdef CONFIG_HAS_LIBBPF
case FD_TYPES__BPFMAP:
ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo);
Expand All @@ -1800,5 +1811,11 @@ int prepare_files(void)
{
init_fdesc_hash();
init_sk_info_hash();

if (init_dead_pidfd_hash()) {
pr_err("Could not initialise hash map for dead pidfds\n");
return -1;
}

return collect_image(&files_cinfo);
}
1 change: 1 addition & 0 deletions criu/image-desc.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF),
FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF),
FD_ENTRY(APPARMOR, "apparmor"),
FD_ENTRY(PIDFD, "pidfd"),

[CR_FD_STATS] = {
.fmt = "stats-%s",
Expand Down
4 changes: 4 additions & 0 deletions criu/include/fs-magic.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,8 @@
#define OVERLAYFS_SUPER_MAGIC 0x794c7630
#endif

#ifndef PID_FS_MAGIC
#define PID_FS_MAGIC 0x50494446
#endif

#endif /* __CR_FS_MAGIC_H__ */
1 change: 1 addition & 0 deletions criu/include/image-desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ enum {
CR_FD_PIPES,
CR_FD_TTY_FILES,
CR_FD_MEMFD_FILE,
CR_FD_PIDFD,

CR_FD_AUTOFS,

Expand Down
1 change: 1 addition & 0 deletions criu/include/magic.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
#define BPFMAP_FILE_MAGIC 0x57506142 /* Alapayevsk */
#define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */
#define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */
#define PIDFD_MAGIC 0x54435556 /* Ufa */

#define IFADDR_MAGIC RAW_IMAGE_MAGIC
#define ROUTE_MAGIC RAW_IMAGE_MAGIC
Expand Down
16 changes: 16 additions & 0 deletions criu/include/pidfd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef __CR_PIDFD_H__
#define __CR_PIDFD_H__

#include "files.h"
#include "pidfd.pb-c.h"

extern const struct fdtype_ops pidfd_dump_ops;
extern struct collect_image_info pidfd_cinfo;
extern int is_pidfd_link(char *link);
extern int init_dead_pidfd_hash(void);
struct pidfd_dump_info {
PidfdEntry pidfe;
pid_t pid;
};

#endif /* __CR_PIDFD_H__ */
1 change: 1 addition & 0 deletions criu/include/protobuf-desc.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ enum {
PB_BPFMAP_FILE,
PB_BPFMAP_DATA,
PB_APPARMOR,
PB_PIDFD,

/* PB_AUTOGEN_STOP */

Expand Down
Loading

0 comments on commit e6e9b3b

Please sign in to comment.