Skip to content

Commit faec5d4

Browse files
committed
runtime: should not send duplicate task exit event
If the shim has been killed and ttrpc connection has been closed, the shimErr will not be nil. For this case, the event subscriber, like moby/moby, might have received the exit or delete events. Just in case, we should allow ttrpc-callback-on-close to send the exit and delete events again. And the exit status will depend on result of shimV2.Delete. If not, the shim has been delivered the exit and delete events. So we should remove the task record and prevent duplicate events from ttrpc-callback-on-close. Fix: containerd#4769 Signed-off-by: Wei Fu <fuweid89@gmail.com>
1 parent cb2dcb0 commit faec5d4

File tree

3 files changed

+125
-1
lines changed

3 files changed

+125
-1
lines changed

container_linux_test.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,128 @@ import (
3636

3737
"github.com/containerd/cgroups"
3838
cgroupsv2 "github.com/containerd/cgroups/v2"
39+
apievents "github.com/containerd/containerd/api/events"
3940
"github.com/containerd/containerd/cio"
4041
"github.com/containerd/containerd/containers"
4142
"github.com/containerd/containerd/errdefs"
43+
"github.com/containerd/containerd/images"
44+
"github.com/containerd/containerd/log/logtest"
45+
"github.com/containerd/containerd/namespaces"
4246
"github.com/containerd/containerd/oci"
4347
"github.com/containerd/containerd/plugin"
4448
"github.com/containerd/containerd/runtime/linux/runctypes"
4549
"github.com/containerd/containerd/runtime/v2/runc/options"
4650
"github.com/containerd/containerd/sys"
51+
"github.com/containerd/typeurl"
4752
specs "github.com/opencontainers/runtime-spec/specs-go"
53+
"github.com/pkg/errors"
4854
"golang.org/x/sys/unix"
4955
)
5056

57+
// TestRegressionIssue4769 verifies the number of task exit events.
58+
//
59+
// Issue: https://github.com/containerd/containerd/issues/4769.
60+
func TestRegressionIssue4769(t *testing.T) {
61+
t.Parallel()
62+
63+
client, err := newClient(t, address)
64+
if err != nil {
65+
t.Fatal(err)
66+
}
67+
defer client.Close()
68+
69+
// use unique namespace to get unique task events
70+
id := t.Name()
71+
ns := fmt.Sprintf("%s-%s", testNamespace, id)
72+
73+
ctx, cancel := context.WithCancel(context.Background())
74+
defer cancel()
75+
76+
ctx = namespaces.WithNamespace(ctx, ns)
77+
ctx = logtest.WithT(ctx, t)
78+
79+
image, err := client.Pull(ctx, testImage, WithPullUnpack)
80+
if err != nil {
81+
t.Fatal(err)
82+
}
83+
defer client.ImageService().Delete(ctx, testImage, images.SynchronousDelete())
84+
85+
container, err := client.NewContainer(ctx, id,
86+
WithNewSnapshot(id, image),
87+
WithNewSpec(oci.WithImageConfig(image), withTrue()),
88+
WithRuntime(client.runtime, nil),
89+
)
90+
if err != nil {
91+
t.Fatal(err)
92+
}
93+
defer container.Delete(ctx, WithSnapshotCleanup)
94+
95+
task, err := container.NewTask(ctx, empty())
96+
if err != nil {
97+
t.Fatal(err)
98+
}
99+
defer task.Delete(ctx)
100+
101+
statusC, err := task.Wait(ctx)
102+
if err != nil {
103+
t.Fatal(err)
104+
}
105+
106+
eventStream, errC := client.EventService().Subscribe(ctx, "namespace=="+ns+",topic~=|^/tasks/exit|")
107+
108+
if err := task.Start(ctx); err != nil {
109+
t.Fatal(err)
110+
}
111+
112+
var timeout = 3 * time.Second
113+
114+
select {
115+
case et := <-statusC:
116+
if got := et.ExitCode(); got != 0 {
117+
t.Fatal(errors.Errorf("expect zero exit status, but got %v", got))
118+
}
119+
case <-time.After(timeout):
120+
t.Fatal(fmt.Errorf("failed to get exit event in time"))
121+
}
122+
123+
// start to check events
124+
select {
125+
case et := <-eventStream:
126+
if et.Event == nil {
127+
t.Fatal(errors.Errorf("unexpect empty event: %+v", et))
128+
}
129+
130+
v, err := typeurl.UnmarshalAny(et.Event)
131+
if err != nil {
132+
t.Fatal(errors.Wrap(err, "failed to unmarshal event"))
133+
}
134+
135+
if e, ok := v.(*apievents.TaskExit); !ok {
136+
t.Fatal(errors.Errorf("unexpect event type: %+v", v))
137+
} else if e.ExitStatus != 0 {
138+
t.Fatal(errors.Errorf("expect zero exit status, but got %v", e.ExitStatus))
139+
}
140+
case err := <-errC:
141+
t.Fatal(errors.Wrap(err, "unexpected error from event service"))
142+
143+
case <-time.After(timeout):
144+
t.Fatal(fmt.Errorf("failed to get exit event in time"))
145+
}
146+
147+
if _, err := task.Delete(ctx); err != nil {
148+
t.Fatal(err)
149+
}
150+
151+
// check duplicate event should not show up
152+
select {
153+
case event := <-eventStream:
154+
t.Fatal(errors.Errorf("unexpected exit event: %+v", event))
155+
case err := <-errC:
156+
t.Fatal(errors.Wrap(err, "unexpected error from event service"))
157+
case <-time.After(timeout):
158+
}
159+
}
160+
51161
func TestTaskUpdate(t *testing.T) {
52162
t.Parallel()
53163

container_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"testing"
3131
"time"
3232

33-
// Register the typeurl
3433
"github.com/containerd/containerd/cio"
3534
"github.com/containerd/containerd/containers"
3635
"github.com/containerd/containerd/namespaces"

runtime/v2/shim.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,21 @@ func (s *shim) Delete(ctx context.Context) (*runtime.Exit, error) {
240240
}
241241
}
242242
}
243+
244+
// NOTE: If the shim has been killed and ttrpc connection has been
245+
// closed, the shimErr will not be nil. For this case, the event
246+
// subscriber, like moby/moby, might have received the exit or delete
247+
// events. Just in case, we should allow ttrpc-callback-on-close to
248+
// send the exit and delete events again. And the exit status will
249+
// depend on result of shimV2.Delete.
250+
//
251+
// If not, the shim has been delivered the exit and delete events.
252+
// So we should remove the record and prevent duplicate events from
253+
// ttrpc-callback-on-close.
254+
if shimErr == nil {
255+
s.rtTasks.Delete(ctx, s.ID())
256+
}
257+
243258
if err := s.waitShutdown(ctx); err != nil {
244259
log.G(ctx).WithField("id", s.ID()).WithError(err).Error("failed to shutdown shim")
245260
}

0 commit comments

Comments
 (0)