diff --git a/SPECS/containerd2/CVE-2026-47262.patch b/SPECS/containerd2/CVE-2026-47262.patch new file mode 100644 index 00000000000..01a5ff91223 --- /dev/null +++ b/SPECS/containerd2/CVE-2026-47262.patch @@ -0,0 +1,272 @@ +From 30708e8d1142287e9c6bb839f1b3f84c71ca4485 Mon Sep 17 00:00:00 2001 +From: Chris Henzie +Date: Fri, 15 May 2026 22:19:37 +0000 +Subject: [PATCH 1/5] Bound user-database file reads in openUserFile + +openUserFile now stats the opened file, refuses anything that is not a +regular file, and wraps the returned fs.File so reads are capped at +maxUserFileBytes (10 MiB). All callers of openUserFile read either +etc/passwd or etc/group; both are regular files on real systems, well +under the cap. + +The cap and the regular-file check together bound parser memory use +when reading user-database files of unexpected shape or size. + +Adds tests for the cap and for the non-regular file rejection. The cap +test covers three boundary points: a small pad (trailing entry parsed), +a pad placing the entry's last byte exactly on the cap (still parsed), +and a pad past the cap (read returns an "exceeds" error). + +Assisted-by: Antigravity +Signed-off-by: Chris Henzie +(cherry picked from commit 7b05ec421d0a07b33964c74145b6bf5dff58f476) +Signed-off-by: Chris Henzie +--- + pkg/oci/spec_opts.go | 56 +++++++++- + pkg/oci/spec_opts_user_bounds_test.go | 146 ++++++++++++++++++++++++++ + 2 files changed, 200 insertions(+), 2 deletions(-) + create mode 100644 pkg/oci/spec_opts_user_bounds_test.go + +diff --git a/pkg/oci/spec_opts.go b/pkg/oci/spec_opts.go +index c298e4bb2..c989552db 100644 +--- a/pkg/oci/spec_opts.go ++++ b/pkg/oci/spec_opts.go +@@ -24,6 +24,7 @@ import ( + "encoding/json" + "errors" + "fmt" ++ "io" + "io/fs" + "math" + "os" +@@ -1800,10 +1801,13 @@ type readLinker interface { + // openUserFile attempts to open a file within the root fs. + // It handles cases where the file is an absolute symlink (e.g., NixOS /etc/passwd -> /nix/store/...), + // which triggers "path escapes from parent" errors in Go 1.24+ due to stricter os.DirFS validation. ++// ++// The returned file rejects non-regular sources and returns an error if more ++// than maxUserFileBytes are read from it. + func openUserFile(root fs.FS, name string) (fs.File, error) { + f, err := root.Open(name) + if err == nil { +- return f, nil ++ return wrapUserFile(f, name) + } + + // Check if the FS implements our local ReadLink interface. +@@ -1820,7 +1824,11 @@ func openUserFile(root fs.FS, name string) (fs.File, error) { + if rerr == nil { + // filepath.Rel might return OS-specific separators (backslashes on Windows). + // fs.Open strictly expects forward slashes, so we convert it. +- return root.Open(filepath.ToSlash(rel)) ++ f, oerr := root.Open(filepath.ToSlash(rel)) ++ if oerr != nil { ++ return nil, oerr ++ } ++ return wrapUserFile(f, name) + } + } + } +@@ -1829,3 +1837,47 @@ func openUserFile(root fs.FS, name string) (fs.File, error) { + // Return the original error if we couldn't resolve it + return nil, err + } ++ ++// maxUserFileBytes caps how much data is read from any user-database file ++// opened via openUserFile. Real systems keep these files well under 1 MiB; ++// 10 MiB is generous headroom while keeping peak memory during ++// user.ParsePasswd/ParseGroup bounded to single-digit MiB. ++const maxUserFileBytes = 10 << 20 ++ ++// wrapUserFile rejects non-regular sources and returns an fs.File that ++// errors out if more than maxUserFileBytes are read from it. ++func wrapUserFile(f fs.File, name string) (fs.File, error) { ++ info, err := f.Stat() ++ if err != nil { ++ f.Close() ++ return nil, fmt.Errorf("stat %s: %w", name, err) ++ } ++ if !info.Mode().IsRegular() { ++ f.Close() ++ return nil, fmt.Errorf("%s is not a regular file", name) ++ } ++ return &limitedFile{ ++ File: f, ++ // Allow one byte past the cap so an overflow surfaces as an ++ // error rather than a silent EOF that the parser would treat as ++ // a clean end-of-file (and miss any entries past the cap). ++ r: &io.LimitedReader{R: f, N: maxUserFileBytes + 1}, ++ name: name, ++ }, nil ++} ++ ++// limitedFile is an fs.File whose Read returns an error once more than ++// maxUserFileBytes have been read. ++type limitedFile struct { ++ fs.File ++ r *io.LimitedReader ++ name string ++} ++ ++func (l *limitedFile) Read(p []byte) (int, error) { ++ n, err := l.r.Read(p) ++ if l.r.N == 0 { ++ return n, fmt.Errorf("%q exceeds %d bytes", l.name, maxUserFileBytes) ++ } ++ return n, err ++} +diff --git a/pkg/oci/spec_opts_user_bounds_test.go b/pkg/oci/spec_opts_user_bounds_test.go +new file mode 100644 +index 000000000..54384f79a +--- /dev/null ++++ b/pkg/oci/spec_opts_user_bounds_test.go +@@ -0,0 +1,146 @@ ++/* ++ Copyright The containerd Authors. ++ ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ See the License for the specific language governing permissions and ++ limitations under the License. ++*/ ++ ++package oci ++ ++import ( ++ "bytes" ++ "errors" ++ "io/fs" ++ "testing" ++ "testing/fstest" ++ "time" ++ ++ "github.com/moby/sys/user" ++ "github.com/stretchr/testify/assert" ++) ++ ++// TestOpenUserFileCapsReads asserts the boundary behavior of the read cap: ++// well below, ending exactly at, and past maxUserFileBytes. ++func TestOpenUserFileCapsReads(t *testing.T) { ++ t.Parallel() ++ ++ beyond := []byte("\nbeyond:x:42:\n") ++ ++ for _, tc := range []struct { ++ name string ++ padBytes int ++ wantGids []uint32 ++ wantErr bool ++ }{ ++ { ++ name: "pad below cap, beyond is parsed", ++ padBytes: 100, ++ wantGids: []uint32{42}, ++ }, ++ { ++ name: "beyond ends exactly at cap, is parsed", ++ padBytes: maxUserFileBytes - len(beyond), ++ wantGids: []uint32{42}, ++ }, ++ { ++ name: "pad past cap, read errors out", ++ padBytes: maxUserFileBytes, ++ wantErr: true, ++ }, ++ } { ++ t.Run(tc.name, func(t *testing.T) { ++ t.Parallel() ++ ++ data := append(bytes.Repeat([]byte{0}, tc.padBytes), beyond...) ++ fsys := fstest.MapFS{ ++ "etc/group": &fstest.MapFile{Data: data, Mode: 0o644}, ++ } ++ ++ gids, err := getSupplementalGroupsFromFS(fsys, func(g user.Group) bool { ++ return g.Name == "beyond" ++ }) ++ if tc.wantErr { ++ assert.ErrorContains(t, err, "exceeds") ++ return ++ } ++ assert.NoError(t, err) ++ assert.Equal(t, tc.wantGids, gids) ++ }) ++ } ++} ++ ++// TestOpenUserFileRejectsNonRegularFiles verifies that non-regular files ++// are refused before any byte is read from them. ++func TestOpenUserFileRejectsNonRegularFiles(t *testing.T) { ++ t.Parallel() ++ ++ for _, tc := range []struct { ++ name string ++ mode fs.FileMode ++ }{ ++ {name: "char device", mode: fs.ModeDevice | fs.ModeCharDevice | 0o666}, ++ {name: "socket", mode: fs.ModeSocket | 0o666}, ++ } { ++ t.Run(tc.name, func(t *testing.T) { ++ t.Parallel() ++ ++ f := &nonRegularFile{mode: tc.mode} ++ rootFS := singleFileFS{name: "etc/group", file: f} ++ ++ _, err := getSupplementalGroupsFromFS(rootFS, nil) ++ assert.Error(t, err) ++ assert.False(t, f.readCalled, "Read should not be called on non-regular file") ++ }) ++ } ++} ++ ++// nonRegularFile implements fs.File and reports a configurable non-regular ++// mode via Stat. ++type nonRegularFile struct { ++ mode fs.FileMode ++ readCalled bool ++} ++ ++func (f *nonRegularFile) Read([]byte) (int, error) { ++ f.readCalled = true ++ return 0, errors.New("read should not be called on non-regular file") ++} ++ ++func (f *nonRegularFile) Stat() (fs.FileInfo, error) { ++ return nonRegularFileInfo{mode: f.mode}, nil ++} ++func (f *nonRegularFile) Close() error { return nil } ++ ++type nonRegularFileInfo struct { ++ mode fs.FileMode ++} ++ ++func (nonRegularFileInfo) Name() string { return "group" } ++func (nonRegularFileInfo) Size() int64 { return 0 } ++func (i nonRegularFileInfo) Mode() fs.FileMode { return i.mode } ++func (nonRegularFileInfo) ModTime() time.Time { return time.Time{} } ++func (nonRegularFileInfo) IsDir() bool { return false } ++func (nonRegularFileInfo) Sys() any { return nil } ++ ++// singleFileFS routes a single name to a single fs.File and returns ++// fs.ErrNotExist for everything else. ++type singleFileFS struct { ++ name string ++ file fs.File ++} ++ ++func (s singleFileFS) Open(name string) (fs.File, error) { ++ if name == s.name { ++ return s.file, nil ++ } ++ return nil, fs.ErrNotExist ++} +-- +2.54.0.1189.g8c84645362-goog + diff --git a/SPECS/containerd2/CVE-2026-50195.patch b/SPECS/containerd2/CVE-2026-50195.patch new file mode 100644 index 00000000000..0ad90d5f780 --- /dev/null +++ b/SPECS/containerd2/CVE-2026-50195.patch @@ -0,0 +1,67 @@ +From cff57884176a1e6ba0857a417753d799958e0f46 Mon Sep 17 00:00:00 2001 +From: Samuel Karp +Date: Tue, 26 May 2026 16:06:58 -0700 +Subject: [PATCH 2/5] cri: do not re-tag restored checkpoints + +Google-Bug-Id: 508657842 +Signed-off-by: Samuel Karp +(cherry picked from commit 0c0918fa8fb4d997f889a3d811603995a3a2b68a) +Signed-off-by: Samuel Karp +--- + .../checkpoint/checkpoint-restore-cri-test.sh | 4 ++-- + .../cri/server/container_checkpoint_linux.go | 17 ----------------- + 2 files changed, 2 insertions(+), 19 deletions(-) + +diff --git a/contrib/checkpoint/checkpoint-restore-cri-test.sh b/contrib/checkpoint/checkpoint-restore-cri-test.sh +index 54735db14..2b2a66388 100755 +--- a/contrib/checkpoint/checkpoint-restore-cri-test.sh ++++ b/contrib/checkpoint/checkpoint-restore-cri-test.sh +@@ -110,7 +110,7 @@ function test_from_archive() { + fi + # Cleanup + echo "--> Cleanup images: " +- crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' ++ (crictl rmi "${TEST_IMAGE}" || true) | sed 's/^/----> \t/' + echo -n "--> Verifying container rootfs: " + crictl exec "$ctr_id" ls -la /root/testfile + if crictl exec "$ctr_id" ls -la /etc/motd >/dev/null 2>&1; then +@@ -184,7 +184,7 @@ function test_from_oci() { + echo "--> Cleanup images: " + ../../bin/ctr -n k8s.io images rm localhost/checkpoint-image:latest | sed 's/^/----> \t/' + echo "--> Cleanup images: " +- crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' ++ (crictl rmi "${TEST_IMAGE}" || true) | sed 's/^/----> \t/' + echo "--> Deleting all pods: " + crictl -t 5s rmp -fa | sed 's/^/----> \t/' + SUCCESS=1 +diff --git a/internal/cri/server/container_checkpoint_linux.go b/internal/cri/server/container_checkpoint_linux.go +index b54963ae8..b122c392d 100644 +--- a/internal/cri/server/container_checkpoint_linux.go ++++ b/internal/cri/server/container_checkpoint_linux.go +@@ -331,23 +331,6 @@ func (c *criService) CRImportCheckpoint( + if _, err := reference.ParseAnyReference(config.RootfsImageName); err != nil { + return "", fmt.Errorf("error parsing reference: %q is not a valid repository/tag %v", config.RootfsImageName, err) + } +- tagImage, err := c.client.ImageService().Get(ctx, config.RootfsImageRef) +- if err != nil { +- return "", fmt.Errorf("failed to get checkpoint base image %s: %w", config.RootfsImageRef, err) +- } +- // Second step is to tag the image with the same tag it used to have +- // during checkpointing. For the error that the image NAME:TAG already +- // exists is ignored. It could happen that NAME:TAG now belongs to +- // another NAME@DIGEST than during checkpointing and the restore will +- // happen on another image. +- // TODO: handle if NAME:TAG points to a different NAME@DIGEST +- tagImage.Name = config.RootfsImageName +- _, err = c.client.ImageService().Create(ctx, tagImage) +- if err != nil { +- if !errdefs.IsAlreadyExists(err) { +- return "", fmt.Errorf("failed to tag checkpoint base image %s with %s: %w", config.RootfsImageRef, config.RootfsImageName, err) +- } +- } + + var image imagestore.Image + for i := 1; i < 500; i++ { +-- +2.54.0.1189.g8c84645362-goog + diff --git a/SPECS/containerd2/CVE-2026-53488.patch b/SPECS/containerd2/CVE-2026-53488.patch new file mode 100644 index 00000000000..c0f4c37fbd7 --- /dev/null +++ b/SPECS/containerd2/CVE-2026-53488.patch @@ -0,0 +1,336 @@ +From b6072a49f8d3f6efc5ac9895efbb1852b16a2602 Mon Sep 17 00:00:00 2001 +From: Ben Cressey +Date: Fri, 29 May 2026 21:33:28 +0000 +Subject: [PATCH 3/5] Do not propagate reserved labels from image configs + +Image config labels are copied onto the container by both the CRI +plugin (BuildLabels) and the client's WithImageConfigLabels option +used by `ctr run`. Labels in the containerd.io/* namespace are +interpreted by containerd itself and labels in the io.cri-containerd* +namespace are interpreted by the CRI plugin. An image config is not a +trusted source for labels in either namespace. + +Skip labels in both reserved namespaces when copying labels from an +image config to a container, and warn about each label skipped: an +image that tries to set them may be attempting to alter containerd +behavior. Oversized image labels are already skipped this way by +the CRI plugin. + +Labels set explicitly by clients, for example via `ctr run --label` +or in the CRI request, are unaffected. + +Verified with the CRI plugin and with `ctr run` against an image +whose config carries labels like these: the labels are no longer +present on the created container and a warning is logged for each. + +Assisted-by: Claude Code +Signed-off-by: Ben Cressey +Signed-off-by: Samuel Karp +(cherry picked from commit 0ec1af4cae1256d18719ca892bf66340499e8050) +Signed-off-by: Akihiro Suda +Signed-off-by: Samuel Karp +--- + client/container_opts.go | 17 ++++++++ + client/container_opts_test.go | 75 ++++++++++++++++++++++++++++++++++ + internal/cri/labels/labels.go | 6 ++- + internal/cri/util/util.go | 12 +++++- + internal/cri/util/util_test.go | 16 ++++++-- + pkg/labels/labels.go | 12 ++++++ + pkg/labels/validate.go | 9 ++++ + pkg/labels/validate_test.go | 17 ++++++++ + 8 files changed, 158 insertions(+), 6 deletions(-) + create mode 100644 client/container_opts_test.go + +diff --git a/client/container_opts.go b/client/container_opts.go +index 04f2a9062..762dcb4b9 100644 +--- a/client/container_opts.go ++++ b/client/container_opts.go +@@ -21,14 +21,17 @@ import ( + "encoding/json" + "errors" + "fmt" ++ "maps" + + "github.com/containerd/containerd/v2/core/containers" + "github.com/containerd/containerd/v2/core/content" + "github.com/containerd/containerd/v2/core/images" + "github.com/containerd/containerd/v2/core/snapshots" ++ "github.com/containerd/containerd/v2/pkg/labels" + "github.com/containerd/containerd/v2/pkg/namespaces" + "github.com/containerd/containerd/v2/pkg/oci" + "github.com/containerd/errdefs" ++ "github.com/containerd/log" + "github.com/containerd/typeurl/v2" + "github.com/opencontainers/image-spec/identity" + v1 "github.com/opencontainers/image-spec/specs-go/v1" +@@ -113,6 +116,10 @@ func WithContainerLabels(labels map[string]string) NewContainerOpts { + // The existing labels are cleared as this is expected to be the first + // operation in setting up a container's labels. Use WithAdditionalContainerLabels + // to add/overwrite the existing image config labels. ++// ++// Image config labels in the namespaces reserved for containerd ++// (containerd.io/) and the CRI plugin (io.cri-containerd) are not copied ++// to the container. + func WithImageConfigLabels(image Image) NewContainerOpts { + return func(ctx context.Context, _ *Client, c *containers.Container) error { + ic, err := image.Config(ctx) +@@ -138,6 +145,16 @@ func WithImageConfigLabels(image Image) NewContainerOpts { + config = ociimage.Config + + c.Labels = config.Labels ++ // Labels in the containerd.io/* namespace are interpreted by containerd ++ // itself, and labels in the io.cri-containerd.* namespace are interpreted ++ // by the CRI plugin, so they are not copied from untrusted image configs. ++ maps.DeleteFunc(c.Labels, func(k, _ string) bool { ++ if labels.IsReserved(k) { ++ log.G(ctx).Warnf("skipping image label %q: the label namespace is reserved for containerd; possible malicious image attempting to alter containerd behavior", k) ++ return true ++ } ++ return false ++ }) + return nil + } + } +diff --git a/client/container_opts_test.go b/client/container_opts_test.go +new file mode 100644 +index 000000000..fb01e6a33 +--- /dev/null ++++ b/client/container_opts_test.go +@@ -0,0 +1,75 @@ ++/* ++ Copyright The containerd Authors. ++ ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ See the License for the specific language governing permissions and ++ limitations under the License. ++*/ ++ ++package client ++ ++import ( ++ "context" ++ "encoding/json" ++ "testing" ++ ++ "github.com/containerd/containerd/v2/core/containers" ++ "github.com/containerd/containerd/v2/core/content" ++ "github.com/opencontainers/go-digest" ++ ocispec "github.com/opencontainers/image-spec/specs-go/v1" ++ "github.com/stretchr/testify/assert" ++ "github.com/stretchr/testify/require" ++) ++ ++// fakeImage implements the subset of Image used by WithImageConfigLabels: ++// Config returns a descriptor with the config blob inlined in Data, so the ++// content store is never consulted. ++type fakeImage struct { ++ Image ++ config ocispec.Descriptor ++} ++ ++func (i fakeImage) Config(context.Context) (ocispec.Descriptor, error) { ++ return i.config, nil ++} ++ ++func (i fakeImage) ContentStore() content.Store { ++ return nil ++} ++ ++func TestWithImageConfigLabels(t *testing.T) { ++ blob, err := json.Marshal(ocispec.Image{ ++ Config: ocispec.ImageConfig{ ++ Labels: map[string]string{ ++ "foo": "bar", ++ "containerd.io/restart.policy": "always", ++ "io.cri-containerd.kind": "sandbox", ++ }, ++ }, ++ }) ++ require.NoError(t, err) ++ ++ img := fakeImage{ ++ config: ocispec.Descriptor{ ++ MediaType: ocispec.MediaTypeImageConfig, ++ Digest: digest.FromBytes(blob), ++ Size: int64(len(blob)), ++ Data: blob, ++ }, ++ } ++ ++ var c containers.Container ++ require.NoError(t, WithImageConfigLabels(img)(t.Context(), nil, &c)) ++ ++ // labels in the namespaces reserved for containerd and the CRI plugin ++ // are not copied from the image config ++ assert.Equal(t, map[string]string{"foo": "bar"}, c.Labels) ++} +diff --git a/internal/cri/labels/labels.go b/internal/cri/labels/labels.go +index c92b9e863..d76bb1409 100644 +--- a/internal/cri/labels/labels.go ++++ b/internal/cri/labels/labels.go +@@ -16,9 +16,13 @@ + + package labels + ++import ( ++ clabels "github.com/containerd/containerd/v2/pkg/labels" ++) ++ + const ( + // criContainerdPrefix is common prefix for cri-containerd +- criContainerdPrefix = "io.cri-containerd" ++ criContainerdPrefix = clabels.CRIContainerdPrefix + // ImageLabelKey is the label key indicating the image is managed by cri plugin. + ImageLabelKey = criContainerdPrefix + ".image" + // ImageLabelValue is the label value indicating the image is managed by cri plugin. +diff --git a/internal/cri/util/util.go b/internal/cri/util/util.go +index dbacd138a..fec4b3bfd 100644 +--- a/internal/cri/util/util.go ++++ b/internal/cri/util/util.go +@@ -81,11 +81,21 @@ func GetPassthroughAnnotations(podAnnotations map[string]string, + return passthroughAnnotations + } + +-// BuildLabels builds the labels from config to be passed to containerd ++// BuildLabels builds the labels from config to be passed to containerd. ++// Image config labels in the namespaces reserved for containerd ++// (containerd.io/) and the CRI plugin (io.cri-containerd) are not copied ++// to the container. + func BuildLabels(configLabels, imageConfigLabels map[string]string, containerType string) map[string]string { + labels := make(map[string]string) + + for k, v := range imageConfigLabels { ++ // Labels in the containerd.io/* namespace are interpreted by containerd ++ // itself, and labels in the io.cri-containerd.* namespace are interpreted ++ // by the CRI plugin, so they are not copied from untrusted image configs. ++ if clabels.IsReserved(k) { ++ log.L.Warnf("skipping image label %q: the label namespace is reserved for containerd; possible malicious image attempting to alter containerd behavior", k) ++ continue ++ } + if err := clabels.Validate(k, v); err == nil { + labels[k] = v + } else { +diff --git a/internal/cri/util/util_test.go b/internal/cri/util/util_test.go +index 3974920f2..1664feb63 100644 +--- a/internal/cri/util/util_test.go ++++ b/internal/cri/util/util_test.go +@@ -145,21 +145,29 @@ func TestPassThroughAnnotationsFilter(t *testing.T) { + + func TestBuildLabels(t *testing.T) { + imageConfigLabels := map[string]string{ +- "a": "z", +- "d": "y", +- "long-label": strings.Repeat("example", 10000), ++ "a": "z", ++ "d": "y", ++ "long-label": strings.Repeat("example", 10000), ++ "containerd.io/restart.policy": "always", ++ "io.cri-containerd.image": "managed", + } + configLabels := map[string]string{ + "a": "b", + "c": "d", ++ // reserved namespaces are only filtered for image config labels, not ++ // for labels from the CRI request ++ "containerd.io/restart.status": "stopped", + } + newLabels := BuildLabels(configLabels, imageConfigLabels, crilabels.ContainerKindSandbox) +- assert.Len(t, newLabels, 4) ++ assert.Len(t, newLabels, 5) + assert.Equal(t, "b", newLabels["a"]) + assert.Equal(t, "d", newLabels["c"]) + assert.Equal(t, "y", newLabels["d"]) ++ assert.Equal(t, "stopped", newLabels["containerd.io/restart.status"]) + assert.Equal(t, crilabels.ContainerKindSandbox, newLabels[crilabels.ContainerKindLabel]) + assert.NotContains(t, newLabels, "long-label") ++ assert.NotContains(t, newLabels, "containerd.io/restart.policy") ++ assert.NotContains(t, newLabels, "io.cri-containerd.image") + + newLabels["a"] = "e" + assert.Empty(t, configLabels[crilabels.ContainerKindLabel], "should not add new labels into original label") +diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go +index 0f9bab5c5..ba4c245e4 100644 +--- a/pkg/labels/labels.go ++++ b/pkg/labels/labels.go +@@ -16,6 +16,18 @@ + + package labels + ++// ReservedPrefix is the prefix of the label namespace reserved for labels ++// defined and consumed by containerd itself. Labels in this namespace must ++// not be copied from untrusted sources such as image config labels. Use ++// IsReserved to check for such labels. ++const ReservedPrefix = "containerd.io/" ++ ++// CRIContainerdPrefix is the prefix of the label namespace reserved for ++// labels defined and consumed by containerd's CRI plugin. Labels in this ++// namespace must not be copied from untrusted sources such as image config ++// labels. Use IsReserved to check for such labels. ++const CRIContainerdPrefix = "io.cri-containerd" ++ + // LabelUncompressed is added to compressed layer contents. + // The value is digest of the uncompressed content. + const LabelUncompressed = "containerd.io/uncompressed" +diff --git a/pkg/labels/validate.go b/pkg/labels/validate.go +index 6f23cdd7c..495427bb4 100644 +--- a/pkg/labels/validate.go ++++ b/pkg/labels/validate.go +@@ -18,6 +18,7 @@ package labels + + import ( + "fmt" ++ "strings" + + "github.com/containerd/errdefs" + ) +@@ -39,3 +40,11 @@ func Validate(k, v string) error { + } + return nil + } ++ ++// IsReserved returns true if the label key is in a namespace reserved for ++// containerd (ReservedPrefix) or its CRI plugin (CRIContainerdPrefix). ++// Reserved labels are interpreted by containerd and must not be copied from ++// untrusted sources such as image config labels. ++func IsReserved(k string) bool { ++ return strings.HasPrefix(k, ReservedPrefix) || strings.HasPrefix(k, CRIContainerdPrefix) ++} +diff --git a/pkg/labels/validate_test.go b/pkg/labels/validate_test.go +index 16be11df3..fb97e5b69 100644 +--- a/pkg/labels/validate_test.go ++++ b/pkg/labels/validate_test.go +@@ -53,6 +53,23 @@ func TestInvalidLabels(t *testing.T) { + } + } + ++func TestIsReserved(t *testing.T) { ++ for key, reserved := range map[string]bool{ ++ "containerd.io/": true, ++ "containerd.io/restart.status": true, ++ "containerd.io/gc.ref.content": true, ++ "io.cri-containerd": true, ++ "io.cri-containerd.kind": true, ++ "io.cri-containerd.image": true, ++ "io.cri-containerdfoo": true, ++ "containerd.io": false, ++ "io.containerd.something": false, ++ "com.example.app": false, ++ } { ++ assert.Equal(t, reserved, IsReserved(key), "IsReserved(%q)", key) ++ } ++} ++ + func TestLongKey(t *testing.T) { + key := strings.Repeat("s", keyMaxLen+1) + value := strings.Repeat("v", maxSize-len(key)) +-- +2.54.0.1189.g8c84645362-goog + diff --git a/SPECS/containerd2/CVE-2026-53489.patch b/SPECS/containerd2/CVE-2026-53489.patch new file mode 100644 index 00000000000..800a30e8459 --- /dev/null +++ b/SPECS/containerd2/CVE-2026-53489.patch @@ -0,0 +1,516 @@ +From 357652293053d0cd3ed565f718b0050aa662ae1a Mon Sep 17 00:00:00 2001 +From: Brian Goff +Date: Tue, 9 Jun 2026 11:37:18 -0700 +Subject: [PATCH 5/5] cri: make checkpoint restore robust to unexpected archive + content + +The CRI checkpoint restore path unpacked checkpoint archive/OCI image content +directly into the container's persistent state directory and read files such as +container.log back from it with a symlink-following copy. Checkpoint content is +externally provided, so make restore more defensive about what it unpacks and +how it reads those files back. + +Behavior changes: + +- Only unpack regular files and directories from the checkpoint archive. + +- Unpack checkpoint content into a dedicated /ctrd-restore + subdirectory created fresh rather than into the state dir itself, so + checkpoint content cannot collide with containerd's own files (e.g. + the "status" blob). Restore and cleanup operate on that subdir; + cleanup is now a single RemoveAll of it. + +Signed-off-by: Brian Goff +(cherry picked from commit 8196411f24065533093be4c7ad874c23b06178f3) +Signed-off-by: Brian Goff +Signed-off-by: Henry Beberman +--- + .../cri/server/container_checkpoint_linux.go | 186 ++++++++++++++---- + .../server/container_checkpoint_linux_test.go | 130 ++++++++++++++++ + internal/cri/server/container_start.go | 35 ++-- + 3 files changed, 286 insertions(+), 65 deletions(-) + +diff --git a/internal/cri/server/container_checkpoint_linux.go b/internal/cri/server/container_checkpoint_linux.go +index b54963ae8..ab1364407 100644 +--- a/internal/cri/server/container_checkpoint_linux.go ++++ b/internal/cri/server/container_checkpoint_linux.go +@@ -31,7 +31,6 @@ import ( + "time" + + crmetadata "github.com/checkpoint-restore/checkpointctl/lib" +- "github.com/checkpoint-restore/go-criu/v7/stats" + "github.com/checkpoint-restore/go-criu/v7/utils" + "github.com/containerd/containerd/api/types/runc/options" + "github.com/containerd/containerd/v2/client" +@@ -56,9 +55,86 @@ import ( + "github.com/opencontainers/image-spec/identity" + v1 "github.com/opencontainers/image-spec/specs-go/v1" + spec "github.com/opencontainers/runtime-spec/specs-go" ++ "golang.org/x/sys/unix" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ++ ++ // TODO: This package import is kept to prevent merge conflicts while integrating multiple ++ // branches, specifically because this changes vendoring. ++ _ "github.com/checkpoint-restore/go-criu/v7/stats" + ) + ++// copyNoFollow copies the regular file at src to dst without following a symlink ++// at the final path component of src. ++// ++// The checkpoint code reads files (container.log, status, stats-dump, dump.log) ++// out of the container state directory, which can contain entries unpacked from a ++// checkpoint archive or OCI image. Those entries are externally provided, so they ++// are read defensively. ++// ++// src is first lstat'd (which does not follow a final-component symlink) and must ++// be a regular file; non-regular entries are rejected before src is ever opened. ++// src is then opened with O_NOFOLLOW as a belt-and-suspenders guard in case the ++// entry changes type between the lstat and the open. ++func copyNoFollow(src, dst string, perm os.FileMode) error { ++ fi, err := os.Lstat(src) ++ if err != nil { ++ return err ++ } ++ if !fi.Mode().IsRegular() { ++ return fmt.Errorf("refusing to copy %s: not a regular file", src) ++ } ++ ++ in, err := os.OpenFile(src, os.O_RDONLY|unix.O_NOFOLLOW, 0) ++ if err != nil { ++ return err ++ } ++ defer in.Close() ++ ++ out, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) ++ if err != nil { ++ return err ++ } ++ defer out.Close() ++ ++ _, err = io.Copy(out, in) ++ return err ++} ++ ++// checkpointArchiveEntryAllowed reports whether a tar entry from a checkpoint ++// archive may be unpacked. Legitimate checkpoint archives contain only regular ++// files and directories; other entry types (symlinks, hardlinks, device and fifo ++// nodes) are not produced by the checkpoint code and are rejected as a hardening ++// measure. ++func checkpointArchiveEntryAllowed(hdr *tar.Header) bool { ++ switch hdr.Typeflag { ++ //nolint:staticcheck // TypeRegA is deprecated but we may still receive an external tar with TypeRegA ++ case tar.TypeReg, tar.TypeRegA, tar.TypeDir, tar.TypeXGlobalHeader: ++ return true ++ default: ++ return false ++ } ++} ++ ++// assertCheckpointDirSafe verifies that the populated restore directory contains ++// only regular files and directories. ++// ++// The OCI-image restore path copies checkpoint content into the restore dir with ++// fs.CopyDir, which (unlike the tar unpack filter) faithfully recreates any ++// symlinks and special files present in the image. Restore-time consumers open ++// paths under this directory, so non-regular entries are rejected before they run. ++func assertCheckpointDirSafe(root string) error { ++ return filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { ++ if err != nil { ++ return err ++ } ++ // d.Type() reports the entry type without following symlinks. ++ if d.IsDir() || d.Type().IsRegular() { ++ return nil ++ } ++ return fmt.Errorf("refusing to restore checkpoint: %s is not a regular file or directory", path) ++ }) ++} ++ + // checkIfCheckpointOCIImage returns checks if the input refers to a checkpoint image. + // It returns the StorageImageID of the image the input resolves to, nil otherwise. + func (c *criService) checkIfCheckpointOCIImage(ctx context.Context, input string) (string, error) { +@@ -187,6 +263,12 @@ func (c *criService) CRImportCheckpoint( + }(archiveFile) + + filter := archive.WithFilter(func(hdr *tar.Header) (bool, error) { ++ // Reject entry types the checkpoint code never produces (symlinks, ++ // hardlinks, device/fifo nodes) so they are not recreated on disk. ++ if !checkpointArchiveEntryAllowed(hdr) { ++ log.G(ctx).Warnf("Skipping unexpected checkpoint archive entry %q (type %d)", hdr.Name, hdr.Typeflag) ++ return false, nil ++ } + // The checkpoint archive is unpacked twice if using a tar file directly. + // The first time only the metadata files are relevant to prepare the + // restore operation. This filter function ignores the large parts of +@@ -410,16 +492,39 @@ func (c *criService) CRImportCheckpoint( + return "", err + } + ++ // Confine all checkpoint content to a dedicated subdirectory of the container ++ // state dir instead of unpacking it directly into the state dir, so it cannot ++ // collide with containerd's own files there. Create it fresh; RemoveAll unlinks ++ // any pre-existing entry without following it. ++ restoreDir := filepath.Join(containerRootDir, checkpointRestoreDir) ++ if err := os.RemoveAll(restoreDir); err != nil { ++ return "", err ++ } ++ if err := os.Mkdir(restoreDir, 0o700); err != nil { ++ return "", err ++ } ++ + if restoreStorageImageID != "" { +- if err := fs.CopyDir(containerRootDir, mountPoint); err != nil { ++ if err := fs.CopyDir(restoreDir, mountPoint); err != nil { + return "", err + } + if err := mount.UnmountAll(mountPoint, 0); err != nil { + return "", err + } ++ // fs.CopyDir recreates any symlinks/special files from the image; reject ++ // them here so restore-time consumers only ever open regular files. ++ if err := assertCheckpointDirSafe(restoreDir); err != nil { ++ return "", err ++ } + } else { + // unpack the checkpoint archive + filter := archive.WithFilter(func(hdr *tar.Header) (bool, error) { ++ // Reject entry types the checkpoint code never produces (symlinks, ++ // hardlinks, device/fifo nodes) so they are not recreated on disk. ++ if !checkpointArchiveEntryAllowed(hdr) { ++ log.G(ctx).Warnf("Skipping unexpected checkpoint archive entry %q (type %d)", hdr.Name, hdr.Typeflag) ++ return false, nil ++ } + excludePatterns := []string{ + crmetadata.ConfigDumpFile, + crmetadata.SpecDumpFile, +@@ -437,19 +542,21 @@ func (c *criService) CRImportCheckpoint( + + // Start from the beginning of the checkpoint archive + archiveFile.Seek(0, 0) +- _, err = archive.Apply(ctx, containerRootDir, archiveFile, []archive.ApplyOpt{filter}...) ++ _, err = archive.Apply(ctx, restoreDir, archiveFile, []archive.ApplyOpt{filter}...) + + if err != nil { +- return "", fmt.Errorf("unpacking of checkpoint archive %s failed: %w", containerRootDir, err) ++ return "", fmt.Errorf("unpacking of checkpoint archive %s failed: %w", restoreDir, err) + } + } +- log.G(ctx).Debugf("Unpacked checkpoint in %s", containerRootDir) ++ log.G(ctx).Debugf("Unpacked checkpoint in %s", restoreDir) + +- // Restore container log file (if it exists) +- containerLog := filepath.Join(containerRootDir, "container.log") +- _, err = c.os.Stat(containerLog) +- if err == nil { +- if err := c.os.CopyFile(containerLog, meta.LogPath, 0600); err != nil { ++ // Restore container log file (if it exists). ++ // ++ // container.log was unpacked from a checkpoint archive/OCI image, so it is ++ // copied without following a final-component symlink. ++ containerLog := filepath.Join(restoreDir, "container.log") ++ if err := copyNoFollow(containerLog, meta.LogPath, 0600); err != nil { ++ if !errors.Is(err, os.ErrNotExist) { + return "", fmt.Errorf("restoring container log file %s failed: %w", containerLog, err) + } + } +@@ -517,7 +624,25 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo + if err != nil { + return nil, fmt.Errorf("failed to get task for container %q: %w", r.GetContainerId(), err) + } +- img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, c.getContainerRootDir(r.GetContainerId()))}...) ++ ++ cpPath := filepath.Join(c.getContainerRootDir(container.ID), "ctrd-checkpoint") ++ // ctrd-checkpoint may already exist from a prior checkpoint operation. RemoveAll ++ // unlinks any existing entry (including a symlink) itself rather than its target, ++ // so creating the directory afterwards cannot write through a link. ++ if err := os.RemoveAll(cpPath); err != nil { ++ return nil, err ++ } ++ if err := os.Mkdir(cpPath, 0o700); err != nil { ++ return nil, err ++ } ++ defer os.RemoveAll(cpPath) ++ ++ // Point CRIU's work directory (where it writes dump.log and stats-dump) at the ++ // dedicated, freshly-created checkpoint dir instead of the persistent container ++ // state dir. Otherwise checkpoint creation litters those files into the state ++ // dir where they are never cleaned up; here they land directly where they are ++ // archived from and are removed with cpPath. ++ img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, cpPath)}...) + if err != nil { + return nil, fmt.Errorf("checkpointing container %q failed: %w", r.GetContainerId(), err) + } +@@ -542,43 +667,20 @@ func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.Checkpo + return nil, fmt.Errorf("failed to unmarshall blob into checkpoint data OCI index: %w", err) + } + +- cpPath := filepath.Join(c.getContainerRootDir(r.GetContainerId()), "ctrd-checkpoint") +- if err := os.MkdirAll(cpPath, 0o700); err != nil { +- return nil, err +- } +- defer os.RemoveAll(cpPath) +- +- // This internal containerd file is used by checkpointctl for +- // checkpoint archive analysis. +- if err := c.os.CopyFile( +- filepath.Join(c.getContainerRootDir(r.GetContainerId()), crmetadata.StatusFile), ++ // This internal containerd file is used by checkpointctl for checkpoint archive ++ // analysis. It lives in the container state dir, which can hold files from a ++ // prior checkpoint operation, so it is read without following symlinks. ++ if err := copyNoFollow( ++ filepath.Join(c.getContainerRootDir(container.ID), crmetadata.StatusFile), + filepath.Join(cpPath, crmetadata.StatusFile), + 0o600, + ); err != nil { + return nil, err + } + +- // This file is created by CRIU and includes timing analysis. +- // Also used by checkpointctl +- if err := c.os.CopyFile( +- filepath.Join(c.getContainerRootDir(r.GetContainerId()), stats.StatsDump), +- filepath.Join(cpPath, stats.StatsDump), +- 0o600, +- ); err != nil { +- return nil, err +- } +- +- // The log file created by CRIU. This file could be missing. +- // Let's ignore errors if the file is missing. +- if err := c.os.CopyFile( +- filepath.Join(c.getContainerRootDir(r.GetContainerId()), crmetadata.DumpLogFile), +- filepath.Join(cpPath, crmetadata.DumpLogFile), +- 0o600, +- ); err != nil { +- if !errors.Is(errors.Unwrap(err), os.ErrNotExist) { +- return nil, err +- } +- } ++ // dump.log and stats-dump are written directly into cpPath by CRIU via its ++ // work directory (see withCheckpointOpts above), so they are already present ++ // for archiving and do not need to be copied out of the container state dir. + + // Save the existing container log file + _, err = c.os.Stat(criContainerStatus.GetStatus().GetLogPath()) +diff --git a/internal/cri/server/container_checkpoint_linux_test.go b/internal/cri/server/container_checkpoint_linux_test.go +index 3010e5233..740a52100 100644 +--- a/internal/cri/server/container_checkpoint_linux_test.go ++++ b/internal/cri/server/container_checkpoint_linux_test.go +@@ -19,13 +19,19 @@ + package server + + import ( ++ "archive/tar" + "context" ++ "errors" ++ "os" ++ "path/filepath" + "sync" + "testing" + + "github.com/containerd/log" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" ++ "github.com/stretchr/testify/require" ++ "golang.org/x/sys/unix" + ) + + type testLogHook struct { +@@ -105,3 +111,127 @@ + }) + } + } ++ ++func TestCopyNoFollowRegularFile(t *testing.T) { ++ dir := t.TempDir() ++ src := filepath.Join(dir, "src") ++ dst := filepath.Join(dir, "dst") ++ require.NoError(t, os.WriteFile(src, []byte("hello"), 0o644)) ++ ++ require.NoError(t, copyNoFollow(src, dst, 0o600)) ++ ++ data, err := os.ReadFile(dst) ++ require.NoError(t, err) ++ assert.Equal(t, "hello", string(data)) ++ ++ info, err := os.Stat(dst) ++ require.NoError(t, err) ++ assert.Equal(t, os.FileMode(0o600), info.Mode().Perm()) ++} ++ ++func TestCopyNoFollowMissingSource(t *testing.T) { ++ dir := t.TempDir() ++ src := filepath.Join(dir, "does-not-exist") ++ dst := filepath.Join(dir, "dst") ++ ++ err := copyNoFollow(src, dst, 0o600) ++ require.Error(t, err) ++ assert.True(t, errors.Is(err, os.ErrNotExist), "expected ErrNotExist, got %v", err) ++ assert.NoFileExists(t, dst) ++} ++ ++func TestCopyNoFollowSymlinkSourceNotFollowed(t *testing.T) { ++ dir := t.TempDir() ++ ++ // A stand-in for a file outside the copy that a symlink might point at. ++ secret := filepath.Join(dir, "outside-target") ++ require.NoError(t, os.WriteFile(secret, []byte("outside-content"), 0o600)) ++ ++ src := filepath.Join(dir, "container.log") ++ require.NoError(t, os.Symlink(secret, src)) ++ dst := filepath.Join(dir, "dst") ++ ++ err := copyNoFollow(src, dst, 0o600) ++ require.Error(t, err) ++ // A symlink is not a "missing file"; it must surface as an error, not be skipped. ++ assert.False(t, errors.Is(err, os.ErrNotExist)) ++ // And the linked-to content must not have been copied into the destination. ++ assert.NoFileExists(t, dst) ++} ++ ++func TestCopyNoFollowRejectsFIFO(t *testing.T) { ++ dir := t.TempDir() ++ src := filepath.Join(dir, "fifo") ++ require.NoError(t, unix.Mkfifo(src, 0o600)) ++ dst := filepath.Join(dir, "dst") ++ ++ // Must return promptly with an error rather than blocking on the FIFO open. ++ err := copyNoFollow(src, dst, 0o600) ++ require.Error(t, err) ++ assert.False(t, errors.Is(err, os.ErrNotExist)) ++ assert.NoFileExists(t, dst) ++} ++ ++func TestCopyNoFollowRejectsDirectory(t *testing.T) { ++ dir := t.TempDir() ++ src := filepath.Join(dir, "adir") ++ require.NoError(t, os.Mkdir(src, 0o700)) ++ dst := filepath.Join(dir, "dst") ++ ++ err := copyNoFollow(src, dst, 0o600) ++ require.Error(t, err) ++ assert.NoFileExists(t, dst) ++} ++ ++func TestAssertCheckpointDirSafe(t *testing.T) { ++ t.Run("regular files and dirs allowed", func(t *testing.T) { ++ root := t.TempDir() ++ require.NoError(t, os.MkdirAll(filepath.Join(root, "checkpoint"), 0o700)) ++ require.NoError(t, os.WriteFile(filepath.Join(root, "checkpoint", "img"), []byte("x"), 0o600)) ++ require.NoError(t, os.WriteFile(filepath.Join(root, "rootfs-diff.tar"), []byte("x"), 0o600)) ++ assert.NoError(t, assertCheckpointDirSafe(root)) ++ }) ++ ++ t.Run("symlink rejected", func(t *testing.T) { ++ root := t.TempDir() ++ require.NoError(t, os.Symlink("/some/outside/path", filepath.Join(root, "rootfs-diff.tar"))) ++ assert.Error(t, assertCheckpointDirSafe(root)) ++ }) ++ ++ t.Run("symlink nested in subdir rejected", func(t *testing.T) { ++ root := t.TempDir() ++ require.NoError(t, os.MkdirAll(filepath.Join(root, "checkpoint"), 0o700)) ++ require.NoError(t, os.Symlink("/some/outside/path", filepath.Join(root, "checkpoint", "pages-1.img"))) ++ assert.Error(t, assertCheckpointDirSafe(root)) ++ }) ++ ++ t.Run("fifo rejected", func(t *testing.T) { ++ root := t.TempDir() ++ require.NoError(t, unix.Mkfifo(filepath.Join(root, "fifo"), 0o600)) ++ assert.Error(t, assertCheckpointDirSafe(root)) ++ }) ++} ++ ++func TestCheckpointArchiveEntryAllowed(t *testing.T) { ++ for _, tc := range []struct { ++ name string ++ typ byte ++ allowed bool ++ }{ ++ {"regular", tar.TypeReg, true}, ++ //nolint:staticcheck // TypeRegA is deprecated but external tars may still use it ++ {"regular-A", tar.TypeRegA, true}, ++ {"directory", tar.TypeDir, true}, ++ {"global-header", tar.TypeXGlobalHeader, true}, ++ {"symlink", tar.TypeSymlink, false}, ++ {"hardlink", tar.TypeLink, false}, ++ {"char-device", tar.TypeChar, false}, ++ {"block-device", tar.TypeBlock, false}, ++ {"fifo", tar.TypeFifo, false}, ++ } { ++ t.Run(tc.name, func(t *testing.T) { ++ got := checkpointArchiveEntryAllowed(&tar.Header{Typeflag: tc.typ, Name: tc.name}) ++ assert.Equal(t, tc.allowed, got) ++ }) ++ } ++} +diff --git a/internal/cri/server/container_start.go b/internal/cri/server/container_start.go +index c0fb8c50f..0c7653cad 100644 +--- a/internal/cri/server/container_start.go ++++ b/internal/cri/server/container_start.go +@@ -31,7 +31,6 @@ import ( + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + + crmetadata "github.com/checkpoint-restore/checkpointctl/lib" +- "github.com/checkpoint-restore/go-criu/v7/stats" + containerd "github.com/containerd/containerd/v2/client" + cio "github.com/containerd/containerd/v2/internal/cri/io" + containerstore "github.com/containerd/containerd/v2/internal/cri/store/container" +@@ -41,6 +40,12 @@ import ( + cioutil "github.com/containerd/containerd/v2/pkg/ioutil" + ) + ++// checkpointRestoreDir is the subdirectory under a container's persistent state ++// directory into which checkpoint content (CRIU images, container.log, ++// rootfs-diff.tar, ...) is unpacked during restore. Confining it here keeps ++// checkpoint content from colliding with containerd's own files in the state dir. ++const checkpointRestoreDir = "ctrd-restore" ++ + // StartContainer starts the container. + func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) { + span := tracing.SpanFromContext(ctx) +@@ -112,7 +117,7 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain + pid, err := container.Restore( + ctx, + ioCreation, +- filepath.Join(c.getContainerRootDir(r.GetContainerId()), crmetadata.CheckpointDirectory), ++ filepath.Join(c.getContainerRootDir(r.GetContainerId()), checkpointRestoreDir, crmetadata.CheckpointDirectory), + ) + + if err != nil { +@@ -155,28 +160,12 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain + // It handles the TaskExit event and update container state after this. + c.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh) + +- // cleanup checkpoint artifacts after restore +- cleanup := [...]string{ +- crmetadata.RestoreLogFile, +- crmetadata.DumpLogFile, +- stats.StatsDump, +- stats.StatsRestore, +- crmetadata.NetworkStatusFile, +- crmetadata.RootFsDiffTar, +- crmetadata.DeletedFilesFile, +- crmetadata.CheckpointDirectory, +- crmetadata.StatusDumpFile, +- crmetadata.ConfigDumpFile, +- crmetadata.SpecDumpFile, +- "container.log", +- } +- for _, del := range cleanup { +- file := filepath.Join(c.getContainerRootDir(r.GetContainerId()), del) +- err = os.RemoveAll(file) +- if err != nil { +- log.G(ctx).Infof("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err) +- } ++ // cleanup checkpoint artifacts after restore. ++ restoreDir := filepath.Join(c.getContainerRootDir(r.GetContainerId()), checkpointRestoreDir) ++ if err := os.RemoveAll(restoreDir); err != nil { ++ log.G(ctx).Warnf("Non-fatal: removal of checkpoint restore dir (%s) failed: %v", restoreDir, err) + } ++ + log.G(ctx).Infof("Restored container %s successfully", r.GetContainerId()) + return &runtime.StartContainerResponse{}, nil + } +-- +2.54.0.1189.g8c84645362-goog diff --git a/SPECS/containerd2/CVE-2026-53492.patch b/SPECS/containerd2/CVE-2026-53492.patch new file mode 100644 index 00000000000..8a727ea777e --- /dev/null +++ b/SPECS/containerd2/CVE-2026-53492.patch @@ -0,0 +1,268 @@ +From be8460656b84c4a1a4b244a03801e9fff1e914d3 Mon Sep 17 00:00:00 2001 +From: Samuel Karp +Date: Mon, 8 Jun 2026 00:58:16 +0000 +Subject: [PATCH 4/5] cri: filter CDI annotations on checkpoint restore + +Filter out any annotations on the checkpointed container matching +`cdi.k8s.io/` or exactly `cdi.k8s.io` during restore to prevent +unauthorized device restoration. When an annotation is denied, a warning +log is generated. + +Tested by: +* Unit tests for exact matching, prefix boundaries, and metadata merging +* Complete CRI integration and checkpoint restore suite + +Assisted-by: Antigravity +Signed-off-by: Samuel Karp +(cherry picked from commit 861ffc1097685f9ecf13adaa381aca5fdf7ef0b4) +Signed-off-by: Chris Henzie +--- + .../checkpoint/checkpoint-restore-cri-test.sh | 22 +++- + .../cri/server/container_checkpoint_linux.go | 56 ++++++--- + .../server/container_checkpoint_linux_test.go | 107 ++++++++++++++++++ + 3 files changed, 167 insertions(+), 18 deletions(-) + create mode 100644 internal/cri/server/container_checkpoint_linux_test.go + +diff --git a/contrib/checkpoint/checkpoint-restore-cri-test.sh b/contrib/checkpoint/checkpoint-restore-cri-test.sh +index 54735db14..82197fa76 100755 +--- a/contrib/checkpoint/checkpoint-restore-cri-test.sh ++++ b/contrib/checkpoint/checkpoint-restore-cri-test.sh +@@ -56,6 +56,15 @@ TESTDATA=testdata + # shellcheck disable=SC2034 + export CONTAINERD_ADDRESS="$TESTDIR/c.sock" + export CONTAINER_RUNTIME_ENDPOINT="unix:///${CONTAINERD_ADDRESS}" ++ ++# Generate crictl config file with 30s timeout ++export CRI_CONFIG_FILE="${TESTDIR}/crictl.yaml" ++cat < "${CRI_CONFIG_FILE}" ++runtime-endpoint: unix://${CONTAINERD_ADDRESS} ++image-endpoint: unix://${CONTAINERD_ADDRESS} ++timeout: 30 ++EOF ++ + TEST_IMAGE=ghcr.io/containerd/alpine + + function test_from_archive() { +@@ -69,9 +78,12 @@ function test_from_archive() { + echo -n "--> Start pod: " + pod_id=$(crictl runp "$POD_JSON") + echo "$pod_id" ++ CTR_JSON=$(mktemp) ++ jq '.annotations = {"cdi.k8s.io/device":"gpu","safe.annotation":"true"}' "$TESTDATA"/container_sleep.json >"$CTR_JSON" + echo -n "--> Create container: " +- ctr_id=$(crictl create "$pod_id" "$TESTDATA"/container_sleep.json "$POD_JSON") ++ ctr_id=$(crictl create "$pod_id" "$CTR_JSON" "$POD_JSON") + echo "$ctr_id" ++ rm -f "$CTR_JSON" + echo -n "--> Start container: " + crictl start "$ctr_id" + lines_before=$(crictl logs "$ctr_id" | wc -l) +@@ -108,6 +120,12 @@ function test_from_archive() { + "should be larger than before checkpointing ($lines_before)" + false + fi ++ echo "--> Verifying CDI annotation filtering on restore: " ++ actual_annots=$(crictl inspect "$ctr_id" | jq -c '.status.annotations') ++ if jq -e 'has("cdi.k8s.io/device") or (has("safe.annotation") | not)' <<<"$actual_annots" >/dev/null; then ++ echo "error: CDI annotation was not filtered or safe annotation missing: $actual_annots" ++ exit 1 ++ fi + # Cleanup + echo "--> Cleanup images: " + crictl rmi "${TEST_IMAGE}" | sed 's/^/----> \t/' +@@ -192,6 +210,8 @@ function test_from_oci() { + + cat >"${TESTDIR}/config.toml" < - 2.2.4-3 +- Patch for CVE-2026-50195, CVE-2026-53488, CVE-2026-53492, CVE-2026-53489, CVE-2026-47262 + * Sat May 30 2026 Jon Slobodzian - 2.2.4-2 - Resolve merge from fasttrack, bring patches for CVE-2026-42506, CVE-2026-39821, CVE-2026-27136 forward to 2.2.4 version of containerd2.