From fb2f7f5dcc6b37a4f39f59d9f477d3fa57d79095 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 25 Jan 2023 17:31:27 +0100 Subject: Fix auto-uid-allocation in Docker containers This didn't work because sandboxing doesn't work in Docker. However, the sandboxing check is done lazily - after clone(CLONE_NEWNS) fails, we retry with sandboxing disabled. But at that point, we've already done UID allocation under the assumption that user namespaces are enabled. So let's get rid of the "goto fallback" logic and just detect early whether user / mount namespaces are enabled. This commit also gets rid of a compatibility hack for some ancient Linux kernels (<2.13). --- src/libutil/namespaces.cc | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/libutil/namespaces.cc (limited to 'src/libutil/namespaces.cc') diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc new file mode 100644 index 000000000..0c3c3cbdd --- /dev/null +++ b/src/libutil/namespaces.cc @@ -0,0 +1,63 @@ +#include "namespaces.hh" +#include "util.hh" + +#if __linux__ + +namespace nix { + +bool userNamespacesSupported() +{ + static bool res = [&]() -> bool + { + if (!pathExists("/proc/self/ns/user")) { + notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); + return false; + } + + Path maxUserNamespaces = "/proc/sys/user/max_user_namespaces"; + if (!pathExists(maxUserNamespaces) || + trim(readFile(maxUserNamespaces)) == "0") + { + notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'"); + return false; + } + + Path procSysKernelUnprivilegedUsernsClone = "/proc/sys/kernel/unprivileged_userns_clone"; + if (pathExists(procSysKernelUnprivilegedUsernsClone) + && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") + { + notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'"); + return false; + } + + Pid pid = startProcess([&]() + { + auto res = unshare(CLONE_NEWUSER); + _exit(res ? 1 : 0); + }); + + return pid.wait() == 0; + }(); + return res; +} + +bool mountNamespacesSupported() +{ + static bool res = [&]() -> bool + { + bool useUserNamespace = userNamespacesSupported(); + + Pid pid = startProcess([&]() + { + auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0)); + _exit(res ? 1 : 0); + }); + + return pid.wait() == 0; + }(); + return res; +} + +} + +#endif -- cgit v1.2.3 From bc1d9fd8b5a14334af1d0455e6b4d595cae959d5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 27 Jan 2023 15:25:56 +0100 Subject: Check whether we can use PID namespaces In unprivileged podman containers, /proc is not fully visible (there are other filesystems mounted on subdirectories of /proc). Therefore we can't mount a new /proc in the sandbox that matches the PID namespace of the sandbox. So this commit automatically disables sandboxing if /proc is not fully visible. --- src/libutil/namespaces.cc | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'src/libutil/namespaces.cc') diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index 0c3c3cbdd..222f0d11b 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -1,5 +1,8 @@ #include "namespaces.hh" #include "util.hh" +#include "finally.hh" + +#include #if __linux__ @@ -7,10 +10,10 @@ namespace nix { bool userNamespacesSupported() { - static bool res = [&]() -> bool + static auto res = [&]() -> bool { if (!pathExists("/proc/self/ns/user")) { - notice("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y, which is required for sandboxing"); + debug("'/proc/self/ns/user' does not exist; your kernel was likely built without CONFIG_USER_NS=y"); return false; } @@ -18,7 +21,7 @@ bool userNamespacesSupported() if (!pathExists(maxUserNamespaces) || trim(readFile(maxUserNamespaces)) == "0") { - notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/user/max_user_namespaces'"); + debug("user namespaces appear to be disabled; check '/proc/sys/user/max_user_namespaces'"); return false; } @@ -26,7 +29,7 @@ bool userNamespacesSupported() if (pathExists(procSysKernelUnprivilegedUsernsClone) && trim(readFile(procSysKernelUnprivilegedUsernsClone)) == "0") { - notice("user namespaces appear to be disabled; they are required for sandboxing; check '/proc/sys/kernel/unprivileged_userns_clone'"); + debug("user namespaces appear to be disabled; check '/proc/sys/kernel/unprivileged_userns_clone'"); return false; } @@ -43,7 +46,7 @@ bool userNamespacesSupported() bool mountNamespacesSupported() { - static bool res = [&]() -> bool + static auto res = [&]() -> bool { bool useUserNamespace = userNamespacesSupported(); @@ -58,6 +61,30 @@ bool mountNamespacesSupported() return res; } +bool pidNamespacesSupported() +{ + static auto res = [&]() -> bool + { + /* Check whether /proc is fully visible, i.e. there are no + filesystems mounted on top of files inside /proc. If this + is not the case, then we cannot mount a new /proc inside + the sandbox that matches the sandbox's PID namespace. + See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ + auto fp = fopen("/proc/mounts", "r"); + if (!fp) return false; + Finally delFP = [&]() { fclose(fp); }; + + while (auto ent = getmntent(fp)) + if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) { + debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing"); + return false; + } + + return true; + }(); + return res; +} + } #endif -- cgit v1.2.3 From d834de2894b5addc5a4a8c5088debd56a8517db1 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 27 Jan 2023 16:52:01 +0100 Subject: Fix macOS build --- src/libutil/namespaces.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/libutil/namespaces.cc') diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index 222f0d11b..b1cdbfe03 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -1,11 +1,11 @@ +#if __linux__ + #include "namespaces.hh" #include "util.hh" #include "finally.hh" #include -#if __linux__ - namespace nix { bool userNamespacesSupported() -- cgit v1.2.3 From 0a70b411e1afaa22d8b01560de908246042daf10 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 7 Feb 2023 23:01:39 +0100 Subject: Print debug message if a namespace test fails --- src/libutil/namespaces.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/libutil/namespaces.cc') diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index b1cdbfe03..fdd52d92b 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -39,7 +39,12 @@ bool userNamespacesSupported() _exit(res ? 1 : 0); }); - return pid.wait() == 0; + bool supported = pid.wait() == 0; + + if (!supported) + debug("user namespaces do not work on this system"); + + return supported; }(); return res; } @@ -56,7 +61,12 @@ bool mountNamespacesSupported() _exit(res ? 1 : 0); }); - return pid.wait() == 0; + bool supported = pid.wait() == 0; + + if (!supported) + debug("mount namespaces do not work on this system"); + + return supported; }(); return res; } -- cgit v1.2.3 From f094ba7386fc5fbb3df5fd84008ca07d2289ff26 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Fri, 10 Feb 2023 14:38:14 +0100 Subject: Simplify the PID namespace check: just try to mount /proc Fixes #7783. --- src/libutil/namespaces.cc | 91 +++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 47 deletions(-) (limited to 'src/libutil/namespaces.cc') diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index fdd52d92b..f66accb10 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -4,7 +4,7 @@ #include "util.hh" #include "finally.hh" -#include +#include namespace nix { @@ -33,63 +33,60 @@ bool userNamespacesSupported() return false; } - Pid pid = startProcess([&]() - { - auto res = unshare(CLONE_NEWUSER); - _exit(res ? 1 : 0); - }); - - bool supported = pid.wait() == 0; - - if (!supported) - debug("user namespaces do not work on this system"); - - return supported; - }(); - return res; -} - -bool mountNamespacesSupported() -{ - static auto res = [&]() -> bool - { - bool useUserNamespace = userNamespacesSupported(); - - Pid pid = startProcess([&]() - { - auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0)); - _exit(res ? 1 : 0); - }); - - bool supported = pid.wait() == 0; - - if (!supported) - debug("mount namespaces do not work on this system"); + try { + Pid pid = startProcess([&]() + { + _exit(0); + }, { + .cloneFlags = CLONE_NEWUSER + }); + + auto r = pid.wait(); + assert(!r); + } catch (SysError & e) { + debug("user namespaces do not work on this system: %s", e.msg()); + return false; + } - return supported; + return true; }(); return res; } -bool pidNamespacesSupported() +bool mountAndPidNamespacesSupported() { static auto res = [&]() -> bool { - /* Check whether /proc is fully visible, i.e. there are no - filesystems mounted on top of files inside /proc. If this - is not the case, then we cannot mount a new /proc inside - the sandbox that matches the sandbox's PID namespace. - See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ - auto fp = fopen("/proc/mounts", "r"); - if (!fp) return false; - Finally delFP = [&]() { fclose(fp); }; - - while (auto ent = getmntent(fp)) - if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) { - debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing"); + try { + + Pid pid = startProcess([&]() + { + /* Make sure we don't remount the parent's /proc. */ + if (mount(0, "/", 0, MS_PRIVATE | MS_REC, 0) == -1) + _exit(1); + + /* Test whether we can remount /proc. The kernel disallows + this if /proc is not fully visible, i.e. if there are + filesystems mounted on top of files inside /proc. See + https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ + if (mount("none", "/proc", "proc", 0, 0) == -1) + _exit(2); + + _exit(0); + }, { + .cloneFlags = CLONE_NEWNS | CLONE_NEWPID | (userNamespacesSupported() ? CLONE_NEWUSER : 0) + }); + + if (pid.wait()) { + debug("PID namespaces do not work on this system: cannot remount /proc"); return false; } + } catch (SysError & e) { + debug("mount namespaces do not work on this system: %s", e.msg()); + return false; + } + return true; }(); return res; -- cgit v1.2.3