From af0e413e03f5ac22139a7f8a4d81db75a5b0fb86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 29 May 2023 19:28:19 +0200 Subject: [PATCH 1/7] Fix dumping hugetlb-based memfd on kernels < 4.16. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4.15-based kernels don't allow F_*SEAL for memfds created with MFD_HUGETLB. Since seals are not possible in this case, fake F_GETSEALS result as if it was queried for a non-sealing-enabled memfd. Signed-off-by: Michał Mirosław --- criu/memfd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/criu/memfd.c b/criu/memfd.c index da29377034b..6a43dece60b 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -93,8 +93,17 @@ static int dump_memfd_inode(int fd, struct memfd_dump_inode *inode, const char * } mie.seals = fcntl(fd, F_GET_SEALS); - if (mie.seals == -1) - goto out; + if (mie.seals == -1) { + if (errno != EINVAL || ~mie.hugetlb_flag & MFD_HUGETLB) { + pr_perror("fcntl(F_GET_SEALS)"); + goto out; + } + /* Kernels before 4.16 don't allow MFD_HUGETLB | + * MFD_ALLOW_SEALING and return EINVAL for + * fcntl(MFD_HUGETLB-enabled fd). + */ + mie.seals = F_SEAL_SEAL; + } if (pb_write_one(img_from_set(glob_imgset, CR_FD_MEMFD_INODE), &mie, PB_MEMFD_INODE)) goto out; From 11288c968d57e001df32a17346c5d262a32d3958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 31 May 2023 15:07:43 +0200 Subject: [PATCH 2/7] Fix mount(cgroup2) for older kernels. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux 4.15 doesn't like empty string for cgroup2 mount options. Pass NULL then to satisfy the kernel check. Log the options for easier debugging. Signed-off-by: Michał Mirosław --- criu/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index bcb7b405a37..0bf7b3818c5 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -639,8 +639,8 @@ static int open_cgroupfs(struct cg_ctl *cc) return -1; } - if (mount("none", prefix, fstype, 0, mopts) < 0) { - pr_perror("Unable to mount %s", mopts); + if (mount("none", prefix, fstype, 0, mopts[0] ? mopts : NULL) < 0) { + pr_perror("Unable to mount %s %s", fstype, mopts); rmdir(prefix); return -1; } From 9e6454f50b0f06683ac5a0875535443efb498f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 2 Jun 2023 18:02:38 +0200 Subject: [PATCH 3/7] Restore THP_DISABLE prctl. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original commit added saving THP_DISABLED flag value, but missed restoring it. There is restoring code, but used only when --lazy_pages mode is enabled. Restore the prctl flag always. While at it, rename the `has_thp_enabled` -> `!thp_disabled` for consistency. Fixes: bbbd597b4124 (2017-06-28 "mem: add dump state of THP_DISABLED prctl") Signed-off-by: Michał Mirosław --- criu/cr-restore.c | 2 +- criu/include/restorer.h | 2 +- criu/include/rst_info.h | 2 -- criu/mem.c | 4 ---- criu/pie/restorer.c | 16 ++++++---------- 5 files changed, 8 insertions(+), 18 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 2b99a775d80..bff41dc5656 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2971,7 +2971,7 @@ static int prepare_mm(pid_t pid, struct task_restore_args *args) args->fd_exe_link = exe_fd; - args->has_thp_enabled = rsti(current)->has_thp_enabled; + args->thp_disabled = mm->has_thp_disabled && mm->thp_disabled; ret = 0; out: diff --git a/criu/include/restorer.h b/criu/include/restorer.h index bc0beb5cbb0..e232f540400 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -144,7 +144,7 @@ struct task_restore_args { struct timeval logstart; int uffd; - bool has_thp_enabled; + bool thp_disabled; /* threads restoration */ int nr_threads; /* number of threads */ diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index d0a3db6c5d5..704b42a7273 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -73,8 +73,6 @@ struct rst_info { */ bool has_old_seccomp_filter; - bool has_thp_enabled; - struct rst_rseq *rseqe; void *breakpoint; diff --git a/criu/mem.c b/criu/mem.c index 9bf7cae971b..417e0a21de7 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -1217,8 +1217,6 @@ static int restore_priv_vma_content(struct pstree_item *t, struct page_read *pr) static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr) { - MmEntry *mm = rsti(t)->mm; - /* * There is no need to disable it if the page read doesn't * have parent. In this case VMA will be empty until @@ -1241,8 +1239,6 @@ static int maybe_disable_thp(struct pstree_item *t, struct page_read *pr) pr_perror("Cannot disable THP"); return -1; } - if (!(mm->has_thp_disabled && mm->thp_disabled)) - rsti(t)->has_thp_enabled = true; return 0; } diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 1f08bc2a087..0d1360c52b0 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1635,17 +1635,13 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; } - if (args->uffd > -1) { - /* re-enable THP if we disabled it previously */ - if (args->has_thp_enabled) { - int ret; - ret = sys_prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0); - if (ret) { - pr_err("Cannot re-enable THP: %d\n", ret); - goto core_restore_end; - } - } + ret = sys_prctl(PR_SET_THP_DISABLE, args->thp_disabled, 0, 0, 0); + if (ret) { + pr_err("Cannot restore THP_DISABLE=%d flag: %ld\n", args->thp_disabled, ret); + goto core_restore_end; + } + if (args->uffd > -1) { pr_debug("lazy-pages: closing uffd %d\n", args->uffd); /* * All userfaultfd configuration has finished at this point. From 7ca6856be45094b417519fd8cfcb4a1a5365dfc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 2 Jun 2023 17:22:06 +0200 Subject: [PATCH 4/7] Log if prctl(SET_THP_DISABLE) doesn't work as expected. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If prctl(SET_THP_DISABLE) is not used due to bad semantics, log it for easier debugging. Signed-off-by: Michał Mirosław --- criu/kerndat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/kerndat.c b/criu/kerndat.c index bc0c7ba05de..d38e8898ef7 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1324,6 +1324,8 @@ int kerndat_has_thp_disable(void) parse_vmflags(str, &flags, &madv, &io_pf); kdat.has_thp_disable = !(madv & (1 << MADV_NOHUGEPAGE)); + if (!kdat.has_thp_disable) + pr_warn("prctl PR_SET_THP_DISABLE sets MADV_NOHUGEPAGE"); break; } } From d3a33ca1efcfd2cf45dd972cde6e0d350482a703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Wed, 17 May 2023 21:51:59 +0200 Subject: [PATCH 5/7] zdtm: thp_disable: Output a single failure message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While at it, don't carry over stale errno to the fail() message. Signed-off-by: Michał Mirosław --- test/zdtm/static/thp_disable.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/zdtm/static/thp_disable.c b/test/zdtm/static/thp_disable.c index ab88120c2cd..58d6039f8f1 100644 --- a/test/zdtm/static/thp_disable.c +++ b/test/zdtm/static/thp_disable.c @@ -47,15 +47,14 @@ int main(int argc, char **argv) if (get_smaps_bits((unsigned long)area, &new_flags, &new_madv)) return -1; + errno = 0; if (orig_flags != new_flags) { - pr_err("Flags are changed %lx -> %lx\n", orig_flags, new_flags); - fail(); + fail("Flags changed %lx -> %lx\n", orig_flags, new_flags); return -1; } if (orig_madv != new_madv) { - pr_err("Madvs are changed %lx -> %lx\n", orig_madv, new_madv); - fail(); + fail("Madvs changed %lx -> %lx\n", orig_madv, new_madv); return -1; } From 6006cb6eaf034f5581a6f52a981ea64534fe32e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 2 Jun 2023 17:11:16 +0200 Subject: [PATCH 6/7] zdtm: thp_disable: Verify prctl(THP_DISABLE) migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław --- test/zdtm/static/thp_disable.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/zdtm/static/thp_disable.c b/test/zdtm/static/thp_disable.c index 58d6039f8f1..e3850877882 100644 --- a/test/zdtm/static/thp_disable.c +++ b/test/zdtm/static/thp_disable.c @@ -17,6 +17,7 @@ int main(int argc, char **argv) unsigned long orig_flags = 0, new_flags = 0; unsigned long orig_madv = 0, new_madv = 0; void *area; + int ret; test_init(argc, argv); @@ -35,9 +36,31 @@ int main(int argc, char **argv) return -1; } + ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0); + if (ret < 0) { + pr_perror("Getting THP-disabled flag failed"); + return -1; + } + if (ret != 1) { + errno = 0; + fail("prctl(GET_THP_DISABLE) returned unexpected value: %d != 1\n", ret); + return -1; + } + test_daemon(); test_waitsig(); + ret = prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0); + if (ret < 0) { + pr_perror("Getting post-migration THP-disabled flag failed"); + return -1; + } + if (ret != 1) { + errno = 0; + fail("post-migration prctl(GET_THP_DISABLE) returned unexpected value: %d != 1\n", ret); + return -1; + } + if (prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0)) { pr_perror("Enabling THP failed"); return -1; From c75c017e4c22bf8e5082a0cec1d8e9df76bac9dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 2 Jun 2023 19:01:29 +0200 Subject: [PATCH 7/7] zdtm: thp_disable: Verify MADV_NOHUGEPAGE before migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a sanity check for THP_DISABLE. This discovered a broken commit in Google's kernel tree. Signed-off-by: Michał Mirosław --- test/zdtm/static/thp_disable.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/zdtm/static/thp_disable.c b/test/zdtm/static/thp_disable.c index e3850877882..eabb45650d3 100644 --- a/test/zdtm/static/thp_disable.c +++ b/test/zdtm/static/thp_disable.c @@ -47,6 +47,21 @@ int main(int argc, char **argv) return -1; } + test_msg("Fetch pre-migration flags/adv\n"); + if (get_smaps_bits((unsigned long)area, &new_flags, &new_madv)) + return -1; + + errno = 0; + if (orig_flags != new_flags) { + fail("Flags changed %lx -> %lx\n", orig_flags, new_flags); + return -1; + } + + if (orig_madv != new_madv) { + fail("Madvs changed %lx -> %lx\n", orig_madv, new_madv); + return -1; + } + test_daemon(); test_waitsig();