Skip to content

Commit e0f8c04

Browse files
committed
cri: Devices ownership from SecurityContext
CRI container runtimes mount devices (set via kubernetes device plugins) to containers by taking the host user/group IDs (uid/gid) to the corresponding container device. This triggers a problem when trying to run those containers with non-zero (root uid/gid = 0) uid/gid set via runAsUser/runAsGroup: the container process has no permission to use the device even when its gid is permissive to non-root users because the container user does not belong to that group. It is possible to workaround the problem by manually adding the device gid(s) to supplementalGroups. However, this is also problematic because the device gid(s) may have different values depending on the workers' distro/version in the cluster. This patch suggests to take RunAsUser/RunAsGroup set via SecurityContext as the device UID/GID, respectively. The feature must be enabled by setting device_ownership_from_security_context runtime config value to true (valid on Linux only). Signed-off-by: Mikko Ylinen <mikko.ylinen@intel.com>
1 parent af1a090 commit e0f8c04

File tree

5 files changed

+115
-4
lines changed

5 files changed

+115
-4
lines changed

oci/spec_opts_unix.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func WithHostDevices(_ context.Context, _ Client, _ *containers.Container, s *Sp
3737
return nil
3838
}
3939

40-
// WithDevices recursively adds devices from the passed in path and associated cgroup rules for that device.
40+
// WithDevices recursively adds devices from the passed in path.
4141
// If devicePath is a dir it traverses the dir to add all devices in that dir.
4242
// If devicePath is not a dir, it attempts to add the single device.
4343
func WithDevices(devicePath, containerPath, permissions string) SpecOpts {

pkg/cri/config/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ type PluginConfig struct {
258258
// present in /sys/fs/cgroup/cgroup.controllers.
259259
// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
260260
DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
261+
// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
262+
// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
263+
DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
261264
// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
262265
// isolation, security and early detection of issues in the mount configuration when using
263266
// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.

pkg/cri/opts/spec_linux.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,30 @@ func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, erro
285285
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
286286
}
287287

288+
// getDeviceUserGroupID() is used to find the right uid/gid
289+
// value for the device node created in the container namespace.
290+
// The runtime executes mknod() and chmod()s the created
291+
// device with the values returned here.
292+
//
293+
// On Linux, uid and gid are sufficient and the user/groupname do not
294+
// need to be resolved.
295+
//
296+
// TODO(mythi): In case of user namespaces, the runtime simply bind
297+
// mounts the devices from the host. Additional logic is needed
298+
// to check that the runtimes effective UID/GID on the host has the
299+
// permissions to access the device node and/or the right user namespace
300+
// mappings are created.
301+
//
302+
// Ref: https://github.com/kubernetes/kubernetes/issues/92211
303+
func getDeviceUserGroupID(runAsVal *runtime.Int64Value) uint32 {
304+
if runAsVal != nil {
305+
return uint32(runAsVal.GetValue())
306+
}
307+
return 0
308+
}
309+
288310
// WithDevices sets the provided devices onto the container spec
289-
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
311+
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig, enableDeviceOwnershipFromSecurityContext bool) oci.SpecOpts {
290312
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
291313
if s.Linux == nil {
292314
s.Linux = &runtimespec.Linux{}
@@ -295,6 +317,8 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
295317
s.Linux.Resources = &runtimespec.LinuxResources{}
296318
}
297319

320+
oldDevices := len(s.Linux.Devices)
321+
298322
for _, device := range config.GetDevices() {
299323
path, err := osi.ResolveSymbolicLink(device.HostPath)
300324
if err != nil {
@@ -306,6 +330,24 @@ func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOp
306330
return err
307331
}
308332
}
333+
334+
if enableDeviceOwnershipFromSecurityContext {
335+
UID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsUser())
336+
GID := getDeviceUserGroupID(config.GetLinux().GetSecurityContext().GetRunAsGroup())
337+
// Loop all new devices added by oci.WithDevices() to update their
338+
// dev.UID/dev.GID.
339+
//
340+
// non-zero UID/GID from SecurityContext is used to override host's
341+
// device UID/GID for the container.
342+
for idx := oldDevices; idx < len(s.Linux.Devices); idx++ {
343+
if UID != 0 {
344+
*s.Linux.Devices[idx].UID = UID
345+
}
346+
if GID != 0 {
347+
*s.Linux.Devices[idx].GID = GID
348+
}
349+
}
350+
}
309351
return nil
310352
}
311353
}

pkg/cri/server/container_create_linux.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,11 @@ func (c *criService) containerSpec(
222222
specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
223223
} else {
224224
// add requested devices by the config as host devices are not automatically added
225-
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
225+
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
226226
customopts.WithCapabilities(securityContext, c.allCaps))
227227
}
228228
} else { // not privileged
229-
specOpts = append(specOpts, customopts.WithDevices(c.os, config),
229+
specOpts = append(specOpts, customopts.WithDevices(c.os, config, c.config.DeviceOwnershipFromSecurityContext),
230230
customopts.WithCapabilities(securityContext, c.allCaps))
231231
}
232232

pkg/cri/server/container_create_linux_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,72 @@ func TestGenerateUserString(t *testing.T) {
13041304
}
13051305
}
13061306

1307+
func TestNonRootUserAndDevices(t *testing.T) {
1308+
testPid := uint32(1234)
1309+
c := newTestCRIService()
1310+
testSandboxID := "sandbox-id"
1311+
testContainerName := "container-name"
1312+
containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
1313+
1314+
hostDevicesRaw, err := oci.HostDevices()
1315+
assert.NoError(t, err)
1316+
1317+
testDevice := hostDevicesRaw[0]
1318+
1319+
for desc, test := range map[string]struct {
1320+
uid, gid *runtime.Int64Value
1321+
deviceOwnershipFromSecurityContext bool
1322+
expectedDeviceUID uint32
1323+
expectedDeviceGID uint32
1324+
}{
1325+
"expect non-root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
1326+
uid: &runtime.Int64Value{Value: 1},
1327+
gid: &runtime.Int64Value{Value: 10},
1328+
expectedDeviceUID: *testDevice.UID,
1329+
expectedDeviceGID: *testDevice.GID,
1330+
},
1331+
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is disabled": {
1332+
uid: &runtime.Int64Value{Value: 0},
1333+
gid: &runtime.Int64Value{Value: 0},
1334+
expectedDeviceUID: *testDevice.UID,
1335+
expectedDeviceGID: *testDevice.GID,
1336+
},
1337+
"expect non-root container's Devices Uid/Gid to be the same as RunAsUser/RunAsGroup when deviceOwnershipFromSecurityContext is enabled": {
1338+
uid: &runtime.Int64Value{Value: 1},
1339+
gid: &runtime.Int64Value{Value: 10},
1340+
deviceOwnershipFromSecurityContext: true,
1341+
expectedDeviceUID: 1,
1342+
expectedDeviceGID: 10,
1343+
},
1344+
"expect root container's Devices Uid/Gid to be the same as the device Uid/Gid on the host when deviceOwnershipFromSecurityContext is enabled": {
1345+
uid: &runtime.Int64Value{Value: 0},
1346+
gid: &runtime.Int64Value{Value: 0},
1347+
deviceOwnershipFromSecurityContext: true,
1348+
expectedDeviceUID: *testDevice.UID,
1349+
expectedDeviceGID: *testDevice.GID,
1350+
},
1351+
} {
1352+
t.Logf("TestCase %q", desc)
1353+
1354+
c.config.DeviceOwnershipFromSecurityContext = test.deviceOwnershipFromSecurityContext
1355+
containerConfig.Linux.SecurityContext.RunAsUser = test.uid
1356+
containerConfig.Linux.SecurityContext.RunAsGroup = test.gid
1357+
containerConfig.Devices = []*runtime.Device{
1358+
{
1359+
ContainerPath: testDevice.Path,
1360+
HostPath: testDevice.Path,
1361+
Permissions: "r",
1362+
},
1363+
}
1364+
1365+
spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, testImageName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
1366+
assert.NoError(t, err)
1367+
1368+
assert.Equal(t, test.expectedDeviceUID, *spec.Linux.Devices[0].UID)
1369+
assert.Equal(t, test.expectedDeviceGID, *spec.Linux.Devices[0].GID)
1370+
}
1371+
}
1372+
13071373
func TestPrivilegedDevices(t *testing.T) {
13081374
testPid := uint32(1234)
13091375
c := newTestCRIService()

0 commit comments

Comments
 (0)